summaryrefslogtreecommitdiffstats
path: root/qa/workunits/rbd
diff options
context:
space:
mode:
Diffstat (limited to 'qa/workunits/rbd')
-rwxr-xr-xqa/workunits/rbd/cli_generic.sh933
-rwxr-xr-xqa/workunits/rbd/concurrent.sh375
-rwxr-xr-xqa/workunits/rbd/diff.sh53
-rwxr-xr-xqa/workunits/rbd/diff_continuous.sh60
-rwxr-xr-xqa/workunits/rbd/huge-tickets.sh41
-rwxr-xr-xqa/workunits/rbd/image_read.sh680
-rwxr-xr-xqa/workunits/rbd/import_export.sh259
-rwxr-xr-xqa/workunits/rbd/issue-20295.sh18
-rwxr-xr-xqa/workunits/rbd/journal.sh326
-rwxr-xr-xqa/workunits/rbd/kernel.sh100
-rwxr-xr-xqa/workunits/rbd/krbd_data_pool.sh206
-rwxr-xr-xqa/workunits/rbd/krbd_exclusive_option.sh233
-rwxr-xr-xqa/workunits/rbd/krbd_fallocate.sh151
-rwxr-xr-xqa/workunits/rbd/krbd_latest_osdmap_on_map.sh30
-rwxr-xr-xqa/workunits/rbd/krbd_namespaces.sh116
-rwxr-xr-xqa/workunits/rbd/krbd_stable_writes.sh18
-rwxr-xr-xqa/workunits/rbd/krbd_udev_enumerate.sh66
-rwxr-xr-xqa/workunits/rbd/krbd_udev_netlink_enobufs.sh24
-rwxr-xr-xqa/workunits/rbd/krbd_udev_netns.sh86
-rwxr-xr-xqa/workunits/rbd/krbd_udev_symlinks.sh116
-rwxr-xr-xqa/workunits/rbd/map-snapshot-io.sh17
-rwxr-xr-xqa/workunits/rbd/map-unmap.sh45
-rwxr-xr-xqa/workunits/rbd/merge_diff.sh477
-rwxr-xr-xqa/workunits/rbd/notify_master.sh5
-rwxr-xr-xqa/workunits/rbd/notify_slave.sh5
-rwxr-xr-xqa/workunits/rbd/permissions.sh272
-rwxr-xr-xqa/workunits/rbd/qemu-iotests.sh53
-rwxr-xr-xqa/workunits/rbd/qemu_dynamic_features.sh46
-rwxr-xr-xqa/workunits/rbd/qemu_rebuild_object_map.sh37
-rwxr-xr-xqa/workunits/rbd/rbd-ggate.sh242
-rwxr-xr-xqa/workunits/rbd/rbd-nbd.sh253
-rwxr-xr-xqa/workunits/rbd/rbd_groups.sh209
-rwxr-xr-xqa/workunits/rbd/rbd_mirror.sh516
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_bootstrap.sh49
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_fsx_compare.sh38
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_fsx_prepare.sh10
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_ha.sh210
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_helpers.sh1211
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_stress.sh186
-rwxr-xr-xqa/workunits/rbd/read-flags.sh61
-rwxr-xr-xqa/workunits/rbd/simple_big.sh12
-rwxr-xr-xqa/workunits/rbd/test_admin_socket.sh151
-rwxr-xr-xqa/workunits/rbd/test_librbd.sh9
-rwxr-xr-xqa/workunits/rbd/test_librbd_python.sh12
-rwxr-xr-xqa/workunits/rbd/test_lock_fence.sh48
-rwxr-xr-xqa/workunits/rbd/test_rbd_mirror.sh9
-rwxr-xr-xqa/workunits/rbd/test_rbd_tasks.sh276
-rwxr-xr-xqa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh34
-rwxr-xr-xqa/workunits/rbd/verify_pool.sh27
49 files changed, 8411 insertions, 0 deletions
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
new file mode 100755
index 00000000..7f44d932
--- /dev/null
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -0,0 +1,933 @@
+#!/usr/bin/env bash
+set -ex
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+export RBD_FORCE_ALLOW_V1=1
+
+# make sure rbd pool is EMPTY.. this is a test script!!
+rbd ls | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting! run this script on an empty test cluster only." && exit 1
+
+IMGS="testimg1 testimg2 testimg3 testimg4 testimg5 testimg6 testimg-diff1 testimg-diff2 testimg-diff3 foo foo2 bar bar2 test1 test2 test3 test4 clone2"
+
+expect_fail() {
+ "$@" && return 1 || return 0
+}
+
+tiered=0
+if ceph osd dump | grep ^pool | grep "'rbd'" | grep tier; then
+ tiered=1
+fi
+
+remove_images() {
+ for img in $IMGS
+ do
+ (rbd snap purge $img || true) >/dev/null 2>&1
+ (rbd rm $img || true) >/dev/null 2>&1
+ done
+}
+
+test_others() {
+ echo "testing import, export, resize, and snapshots..."
+ TMP_FILES="/tmp/img1 /tmp/img1.new /tmp/img2 /tmp/img2.new /tmp/img3 /tmp/img3.new /tmp/img-diff1.new /tmp/img-diff2.new /tmp/img-diff3.new /tmp/img1.snap1 /tmp/img1.snap1 /tmp/img-diff1.snap1"
+
+ remove_images
+ rm -f $TMP_FILES
+
+ # create an image
+ dd if=/bin/sh of=/tmp/img1 bs=1k count=1 seek=10
+ dd if=/bin/dd of=/tmp/img1 bs=1k count=10 seek=100
+ dd if=/bin/rm of=/tmp/img1 bs=1k count=100 seek=1000
+ dd if=/bin/ls of=/tmp/img1 bs=1k seek=10000
+ dd if=/bin/ln of=/tmp/img1 bs=1k seek=100000
+
+ # import, snapshot
+ rbd import $RBD_CREATE_ARGS /tmp/img1 testimg1
+ rbd resize testimg1 --size=256 --allow-shrink
+ rbd export testimg1 /tmp/img2
+ rbd snap create testimg1 --snap=snap1
+ rbd resize testimg1 --size=128 && exit 1 || true # shrink should fail
+ rbd resize testimg1 --size=128 --allow-shrink
+ rbd export testimg1 /tmp/img3
+
+ # info
+ rbd info testimg1 | grep 'size 128 MiB'
+ rbd info --snap=snap1 testimg1 | grep 'size 256 MiB'
+
+ # export-diff
+ rm -rf /tmp/diff-testimg1-1 /tmp/diff-testimg1-2
+ rbd export-diff testimg1 --snap=snap1 /tmp/diff-testimg1-1
+ rbd export-diff testimg1 --from-snap=snap1 /tmp/diff-testimg1-2
+
+ # import-diff
+ rbd create $RBD_CREATE_ARGS --size=1 testimg-diff1
+ rbd import-diff --sparse-size 8K /tmp/diff-testimg1-1 testimg-diff1
+ rbd import-diff --sparse-size 8K /tmp/diff-testimg1-2 testimg-diff1
+
+ # info
+ rbd info testimg1 | grep 'size 128 MiB'
+ rbd info --snap=snap1 testimg1 | grep 'size 256 MiB'
+ rbd info testimg-diff1 | grep 'size 128 MiB'
+ rbd info --snap=snap1 testimg-diff1 | grep 'size 256 MiB'
+
+ # make copies
+ rbd copy testimg1 --snap=snap1 testimg2
+ rbd copy testimg1 testimg3
+ rbd copy testimg-diff1 --sparse-size 768K --snap=snap1 testimg-diff2
+ rbd copy testimg-diff1 --sparse-size 768K testimg-diff3
+
+ # verify the result
+ rbd info testimg2 | grep 'size 256 MiB'
+ rbd info testimg3 | grep 'size 128 MiB'
+ rbd info testimg-diff2 | grep 'size 256 MiB'
+ rbd info testimg-diff3 | grep 'size 128 MiB'
+
+ # deep copies
+ rbd deep copy testimg1 testimg4
+ rbd deep copy testimg1 --snap=snap1 testimg5
+ rbd info testimg4 | grep 'size 128 MiB'
+ rbd info testimg5 | grep 'size 256 MiB'
+ rbd snap ls testimg4 | grep -v 'SNAPID' | wc -l | grep 1
+ rbd snap ls testimg4 | grep '.*snap1.*'
+
+ rbd export testimg1 /tmp/img1.new
+ rbd export testimg2 /tmp/img2.new
+ rbd export testimg3 /tmp/img3.new
+ rbd export testimg-diff1 /tmp/img-diff1.new
+ rbd export testimg-diff2 /tmp/img-diff2.new
+ rbd export testimg-diff3 /tmp/img-diff3.new
+
+ cmp /tmp/img2 /tmp/img2.new
+ cmp /tmp/img3 /tmp/img3.new
+ cmp /tmp/img2 /tmp/img-diff2.new
+ cmp /tmp/img3 /tmp/img-diff3.new
+
+ # rollback
+ rbd snap rollback --snap=snap1 testimg1
+ rbd snap rollback --snap=snap1 testimg-diff1
+ rbd info testimg1 | grep 'size 256 MiB'
+ rbd info testimg-diff1 | grep 'size 256 MiB'
+ rbd export testimg1 /tmp/img1.snap1
+ rbd export testimg-diff1 /tmp/img-diff1.snap1
+ cmp /tmp/img2 /tmp/img1.snap1
+ cmp /tmp/img2 /tmp/img-diff1.snap1
+
+ # test create, copy of zero-length images
+ rbd rm testimg2
+ rbd rm testimg3
+ rbd create testimg2 -s 0
+ rbd cp testimg2 testimg3
+ rbd deep cp testimg2 testimg6
+
+ # remove snapshots
+ rbd snap rm --snap=snap1 testimg1
+ rbd snap rm --snap=snap1 testimg-diff1
+ rbd info --snap=snap1 testimg1 2>&1 | grep 'error setting snapshot context: (2) No such file or directory'
+ rbd info --snap=snap1 testimg-diff1 2>&1 | grep 'error setting snapshot context: (2) No such file or directory'
+
+ # sparsify
+ rbd sparsify testimg1
+
+ remove_images
+ rm -f $TMP_FILES
+}
+
+test_rename() {
+ echo "testing rename..."
+ remove_images
+
+ rbd create --image-format 1 -s 1 foo
+ rbd create --image-format 2 -s 1 bar
+ rbd rename foo foo2
+ rbd rename foo2 bar 2>&1 | grep exists
+ rbd rename bar bar2
+ rbd rename bar2 foo2 2>&1 | grep exists
+
+ ceph osd pool create rbd2 8
+ rbd pool init rbd2
+ rbd create -p rbd2 -s 1 foo
+ rbd rename rbd2/foo rbd2/bar
+ rbd -p rbd2 ls | grep bar
+ rbd rename rbd2/bar foo
+ rbd rename --pool rbd2 foo bar
+ ! rbd rename rbd2/bar --dest-pool rbd foo
+ rbd rename --pool rbd2 bar --dest-pool rbd2 foo
+ rbd -p rbd2 ls | grep foo
+ ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+
+ remove_images
+}
+
+test_ls() {
+ echo "testing ls..."
+ remove_images
+
+ rbd create --image-format 1 -s 1 test1
+ rbd create --image-format 1 -s 1 test2
+ rbd ls | grep test1
+ rbd ls | grep test2
+ rbd ls | wc -l | grep 2
+ # look for fields in output of ls -l without worrying about space
+ rbd ls -l | grep 'test1.*1 MiB.*1'
+ rbd ls -l | grep 'test2.*1 MiB.*1'
+
+ rbd rm test1
+ rbd rm test2
+
+ rbd create --image-format 2 -s 1 test1
+ rbd create --image-format 2 -s 1 test2
+ rbd ls | grep test1
+ rbd ls | grep test2
+ rbd ls | wc -l | grep 2
+ rbd ls -l | grep 'test1.*1 MiB.*2'
+ rbd ls -l | grep 'test2.*1 MiB.*2'
+
+ rbd rm test1
+ rbd rm test2
+
+ rbd create --image-format 2 -s 1 test1
+ rbd create --image-format 1 -s 1 test2
+ rbd ls | grep test1
+ rbd ls | grep test2
+ rbd ls | wc -l | grep 2
+ rbd ls -l | grep 'test1.*1 MiB.*2'
+ rbd ls -l | grep 'test2.*1 MiB.*1'
+ remove_images
+
+ # test that many images can be shown by ls
+ for i in $(seq -w 00 99); do
+ rbd create image.$i -s 1
+ done
+ rbd ls | wc -l | grep 100
+ rbd ls -l | grep image | wc -l | grep 100
+ for i in $(seq -w 00 99); do
+ rbd rm image.$i
+ done
+
+ for i in $(seq -w 00 99); do
+ rbd create image.$i --image-format 2 -s 1
+ done
+ rbd ls | wc -l | grep 100
+ rbd ls -l | grep image | wc -l | grep 100
+ for i in $(seq -w 00 99); do
+ rbd rm image.$i
+ done
+}
+
+test_remove() {
+ echo "testing remove..."
+ remove_images
+
+ rbd remove "NOT_EXIST" && exit 1 || true # remove should fail
+ rbd create --image-format 1 -s 1 test1
+ rbd rm test1
+ rbd ls | wc -l | grep "^0$"
+
+ rbd create --image-format 2 -s 1 test2
+ rbd rm test2
+ rbd ls | wc -l | grep "^0$"
+
+ # check that remove succeeds even if it's
+ # interrupted partway through. simulate this
+ # by removing some objects manually.
+
+ # remove with header missing (old format)
+ rbd create --image-format 1 -s 1 test1
+ rados rm -p rbd test1.rbd
+ rbd rm test1
+ rbd ls | wc -l | grep "^0$"
+
+ if [ $tiered -eq 0 ]; then
+ # remove with header missing
+ rbd create --image-format 2 -s 1 test2
+ HEADER=$(rados -p rbd ls | grep '^rbd_header')
+ rados -p rbd rm $HEADER
+ rbd rm test2
+ rbd ls | wc -l | grep "^0$"
+
+ # remove with id missing
+ rbd create --image-format 2 -s 1 test2
+ rados -p rbd rm rbd_id.test2
+ rbd rm test2
+ rbd ls | wc -l | grep "^0$"
+
+ # remove with header and id missing
+ rbd create --image-format 2 -s 1 test2
+ HEADER=$(rados -p rbd ls | grep '^rbd_header')
+ rados -p rbd rm $HEADER
+ rados -p rbd rm rbd_id.test2
+ rbd rm test2
+ rbd ls | wc -l | grep "^0$"
+ fi
+
+ # remove with rbd_children object missing (and, by extension,
+ # with child not mentioned in rbd_children)
+ rbd create --image-format 2 -s 1 test2
+ rbd snap create test2@snap
+ rbd snap protect test2@snap
+ rbd clone test2@snap clone --rbd-default-clone-format 1
+
+ rados -p rbd rm rbd_children
+ rbd rm clone
+ rbd ls | grep clone | wc -l | grep '^0$'
+
+ rbd snap unprotect test2@snap
+ rbd snap rm test2@snap
+ rbd rm test2
+}
+
+test_locking() {
+ echo "testing locking..."
+ remove_images
+
+ rbd create $RBD_CREATE_ARGS -s 1 test1
+ rbd lock list test1 | wc -l | grep '^0$'
+ rbd lock add test1 id
+ rbd lock list test1 | grep ' 1 '
+ LOCKER=$(rbd lock list test1 | tail -n 1 | awk '{print $1;}')
+ rbd lock remove test1 id $LOCKER
+ rbd lock list test1 | wc -l | grep '^0$'
+
+ rbd lock add test1 id --shared tag
+ rbd lock list test1 | grep ' 1 '
+ rbd lock add test1 id --shared tag
+ rbd lock list test1 | grep ' 2 '
+ rbd lock add test1 id2 --shared tag
+ rbd lock list test1 | grep ' 3 '
+ rbd lock list test1 | tail -n 1 | awk '{print $2, $1;}' | xargs rbd lock remove test1
+ if rbd info test1 | grep -qE "features:.*exclusive"
+ then
+ # new locking functionality requires all locks to be released
+ while [ -n "$(rbd lock list test1)" ]
+ do
+ rbd lock list test1 | tail -n 1 | awk '{print $2, $1;}' | xargs rbd lock remove test1
+ done
+ fi
+ rbd rm test1
+}
+
+test_pool_image_args() {
+ echo "testing pool and image args..."
+ remove_images
+
+ ceph osd pool delete test test --yes-i-really-really-mean-it || true
+ ceph osd pool create test 32
+ rbd pool init test
+ truncate -s 1 /tmp/empty /tmp/empty@snap
+
+ rbd ls | wc -l | grep 0
+ rbd create -s 1 test1
+ rbd ls | grep -q test1
+ rbd import --image test2 /tmp/empty
+ rbd ls | grep -q test2
+ rbd --dest test3 import /tmp/empty
+ rbd ls | grep -q test3
+ rbd import /tmp/empty foo
+ rbd ls | grep -q foo
+
+ # should fail due to "destination snapname specified"
+ rbd import --dest test/empty@snap /tmp/empty && exit 1 || true
+ rbd import /tmp/empty test/empty@snap && exit 1 || true
+ rbd import --image test/empty@snap /tmp/empty && exit 1 || true
+ rbd import /tmp/empty@snap && exit 1 || true
+
+ rbd ls test | wc -l | grep 0
+ rbd import /tmp/empty test/test1
+ rbd ls test | grep -q test1
+ rbd -p test import /tmp/empty test2
+ rbd ls test | grep -q test2
+ rbd --image test3 -p test import /tmp/empty
+ rbd ls test | grep -q test3
+ rbd --image test4 -p test import /tmp/empty
+ rbd ls test | grep -q test4
+ rbd --dest test5 -p test import /tmp/empty
+ rbd ls test | grep -q test5
+ rbd --dest test6 --dest-pool test import /tmp/empty
+ rbd ls test | grep -q test6
+ rbd --image test7 --dest-pool test import /tmp/empty
+ rbd ls test | grep -q test7
+ rbd --image test/test8 import /tmp/empty
+ rbd ls test | grep -q test8
+ rbd --dest test/test9 import /tmp/empty
+ rbd ls test | grep -q test9
+ rbd import --pool test /tmp/empty
+ rbd ls test | grep -q empty
+
+ # copy with no explicit pool goes to pool rbd
+ rbd copy test/test9 test10
+ rbd ls test | grep -qv test10
+ rbd ls | grep -q test10
+ rbd copy test/test9 test/test10
+ rbd ls test | grep -q test10
+ rbd copy --pool test test10 --dest-pool test test11
+ rbd ls test | grep -q test11
+ rbd copy --dest-pool rbd --pool test test11 test12
+ rbd ls | grep test12
+ rbd ls test | grep -qv test12
+
+ rm -f /tmp/empty /tmp/empty@snap
+ ceph osd pool delete test test --yes-i-really-really-mean-it
+
+ for f in foo test1 test10 test12 test2 test3 ; do
+ rbd rm $f
+ done
+}
+
+test_clone() {
+ echo "testing clone..."
+ remove_images
+ rbd create test1 $RBD_CREATE_ARGS -s 1
+ rbd snap create test1@s1
+ rbd snap protect test1@s1
+
+ ceph osd pool create rbd2 8
+ rbd pool init rbd2
+ rbd clone test1@s1 rbd2/clone
+ rbd -p rbd2 ls | grep clone
+ rbd -p rbd2 ls -l | grep clone | grep test1@s1
+ rbd ls | grep -v clone
+ rbd flatten rbd2/clone
+ rbd snap create rbd2/clone@s1
+ rbd snap protect rbd2/clone@s1
+ rbd clone rbd2/clone@s1 clone2
+ rbd ls | grep clone2
+ rbd ls -l | grep clone2 | grep rbd2/clone@s1
+ rbd -p rbd2 ls | grep -v clone2
+
+ rbd rm clone2
+ rbd snap unprotect rbd2/clone@s1
+ rbd snap rm rbd2/clone@s1
+ rbd rm rbd2/clone
+ rbd snap unprotect test1@s1
+ rbd snap rm test1@s1
+ rbd rm test1
+ ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_trash() {
+ echo "testing trash..."
+ remove_images
+
+ rbd create $RBD_CREATE_ARGS -s 1 test1
+ rbd create $RBD_CREATE_ARGS -s 1 test2
+ rbd ls | grep test1
+ rbd ls | grep test2
+ rbd ls | wc -l | grep 2
+ rbd ls -l | grep 'test1.*2.*'
+ rbd ls -l | grep 'test2.*2.*'
+
+ rbd trash mv test1
+ rbd ls | grep test2
+ rbd ls | wc -l | grep 1
+ rbd ls -l | grep 'test2.*2.*'
+
+ rbd trash ls | grep test1
+ rbd trash ls | wc -l | grep 1
+ rbd trash ls -l | grep 'test1.*USER.*'
+ rbd trash ls -l | grep -v 'protected until'
+
+ ID=`rbd trash ls | cut -d ' ' -f 1`
+ rbd trash rm $ID
+
+ rbd trash mv test2
+ ID=`rbd trash ls | cut -d ' ' -f 1`
+ rbd info --image-id $ID | grep "rbd image 'test2'"
+
+ rbd trash restore $ID
+ rbd ls | grep test2
+ rbd ls | wc -l | grep 1
+ rbd ls -l | grep 'test2.*2.*'
+
+ rbd trash mv test2 --expires-at "3600 sec"
+ rbd trash ls | grep test2
+ rbd trash ls | wc -l | grep 1
+ rbd trash ls -l | grep 'test2.*USER.*protected until'
+
+ rbd trash rm $ID 2>&1 | grep 'Deferment time has not expired'
+ rbd trash rm --image-id $ID --force
+
+ rbd create $RBD_CREATE_ARGS -s 1 test1
+ rbd snap create test1@snap1
+ rbd snap protect test1@snap1
+ rbd trash mv test1
+
+ rbd trash ls | grep test1
+ rbd trash ls | wc -l | grep 1
+ rbd trash ls -l | grep 'test1.*USER.*'
+ rbd trash ls -l | grep -v 'protected until'
+
+ ID=`rbd trash ls | cut -d ' ' -f 1`
+ rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1
+ rbd snap ls --image-id $ID | grep '.*snap1.*'
+
+ rbd snap unprotect --image-id $ID --snap snap1
+ rbd snap rm --image-id $ID --snap snap1
+ rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0
+
+ rbd trash restore $ID
+ rbd snap create test1@snap1
+ rbd snap create test1@snap2
+ rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 2
+ rbd snap purge --image-id $ID
+ rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0
+
+ rbd rm --rbd_move_to_trash_on_remove=true --rbd_move_to_trash_on_remove_expire_seconds=3600 test1
+ rbd trash ls | grep test1
+ rbd trash ls | wc -l | grep 1
+ rbd trash ls -l | grep 'test1.*USER.*protected until'
+ rbd trash rm $ID 2>&1 | grep 'Deferment time has not expired'
+ rbd trash rm --image-id $ID --force
+
+ remove_images
+}
+
+test_purge() {
+ echo "testing trash purge..."
+ remove_images
+
+ rbd trash purge
+ rbd trash ls | wc -l | grep 0
+
+ rbd create $RBD_CREATE_ARGS foo -s 1
+ rbd create $RBD_CREATE_ARGS bar -s 1
+
+ rbd trash mv foo --expires-at "10 sec"
+ rbd trash mv bar --expires-at "30 sec"
+
+ rbd trash purge --expired-before "now + 10 sec"
+ rbd trash ls | grep -v foo | wc -l | grep 1
+ rbd trash ls | grep bar
+
+ LAST_IMG=$(rbd trash ls | grep bar | awk '{print $1;}')
+ rbd trash rm $LAST_IMG --force --no-progress | grep -v '.' | wc -l | grep 0
+}
+
+test_deep_copy_clone() {
+ echo "testing deep copy clone..."
+ remove_images
+
+ rbd create testimg1 $RBD_CREATE_ARGS --size 256
+ rbd snap create testimg1 --snap=snap1
+ rbd snap protect testimg1@snap1
+ rbd clone testimg1@snap1 testimg2
+ rbd snap create testimg2@snap2
+ rbd deep copy testimg2 testimg3
+ rbd info testimg3 | grep 'size 256 MiB'
+ rbd info testimg3 | grep 'parent: rbd/testimg1@snap1'
+ rbd snap ls testimg3 | grep -v 'SNAPID' | wc -l | grep 1
+ rbd snap ls testimg3 | grep '.*snap2.*'
+ rbd info testimg2 | grep 'features:.*deep-flatten' || rbd snap rm testimg2@snap2
+ rbd info testimg3 | grep 'features:.*deep-flatten' || rbd snap rm testimg3@snap2
+ rbd flatten testimg2
+ rbd flatten testimg3
+ rbd snap unprotect testimg1@snap1
+ rbd snap purge testimg2
+ rbd snap purge testimg3
+ rbd rm testimg2
+ rbd rm testimg3
+
+ rbd snap protect testimg1@snap1
+ rbd clone testimg1@snap1 testimg2
+ rbd snap create testimg2@snap2
+ rbd deep copy --flatten testimg2 testimg3
+ rbd info testimg3 | grep 'size 256 MiB'
+ rbd info testimg3 | grep -v 'parent:'
+ rbd snap ls testimg3 | grep -v 'SNAPID' | wc -l | grep 1
+ rbd snap ls testimg3 | grep '.*snap2.*'
+ rbd info testimg2 | grep 'features:.*deep-flatten' || rbd snap rm testimg2@snap2
+ rbd flatten testimg2
+ rbd snap unprotect testimg1@snap1
+
+ remove_images
+}
+
+test_clone_v2() {
+ echo "testing clone v2..."
+ remove_images
+
+ rbd create $RBD_CREATE_ARGS -s 1 test1
+ rbd snap create test1@1
+ rbd clone --rbd-default-clone-format=1 test1@1 test2 && exit 1 || true
+ rbd clone --rbd-default-clone-format=2 test1@1 test2
+ rbd clone --rbd-default-clone-format=2 test1@1 test3
+
+ rbd snap protect test1@1
+ rbd clone --rbd-default-clone-format=1 test1@1 test4
+
+ rbd children test1@1 | sort | tr '\n' ' ' | grep -E "test2.*test3.*test4"
+ rbd children --descendants test1 | sort | tr '\n' ' ' | grep -E "test2.*test3.*test4"
+
+ rbd remove test4
+ rbd snap unprotect test1@1
+
+ rbd snap remove test1@1
+ rbd snap list --all test1 | grep -E "trash \(1\) *$"
+
+ rbd snap create test1@2
+ rbd rm test1 2>&1 | grep 'image has snapshots'
+
+ rbd snap rm test1@2
+ rbd rm test1 2>&1 | grep 'linked clones'
+
+ rbd rm test3
+ rbd rm test1 2>&1 | grep 'linked clones'
+
+ rbd flatten test2
+ rbd snap list --all test1 | wc -l | grep '^0$'
+ rbd rm test1
+ rbd rm test2
+}
+
+test_thick_provision() {
+ echo "testing thick provision..."
+ remove_images
+
+ # Try to create small and large thick-pro image and
+ # check actual size. (64M and 4G)
+
+ # Small thick-pro image test
+ rbd create $RBD_CREATE_ARGS --thick-provision -s 64M test1
+ count=0
+ ret=""
+ while [ $count -lt 10 ]
+ do
+ rbd du|grep test1|tr -s " "|cut -d " " -f 4-5|grep '^64 MiB' && ret=$?
+ if [ "$ret" = "0" ]
+ then
+ break;
+ fi
+ count=`expr $count + 1`
+ sleep 2
+ done
+ rbd du
+ if [ "$ret" != "0" ]
+ then
+ exit 1
+ fi
+ rbd rm test1
+ rbd ls | grep test1 | wc -l | grep '^0$'
+
+ # Large thick-pro image test
+ rbd create $RBD_CREATE_ARGS --thick-provision -s 4G test1
+ count=0
+ ret=""
+ while [ $count -lt 10 ]
+ do
+ rbd du|grep test1|tr -s " "|cut -d " " -f 4-5|grep '^4 GiB' && ret=$?
+ if [ "$ret" = "0" ]
+ then
+ break;
+ fi
+ count=`expr $count + 1`
+ sleep 2
+ done
+ rbd du
+ if [ "$ret" != "0" ]
+ then
+ exit 1
+ fi
+ rbd rm test1
+ rbd ls | grep test1 | wc -l | grep '^0$'
+}
+
+test_namespace() {
+ echo "testing namespace..."
+ remove_images
+
+ rbd namespace ls | wc -l | grep '^0$'
+ rbd namespace create rbd/test1
+ rbd namespace create --pool rbd --namespace test2
+ rbd namespace create --namespace test3
+ expect_fail rbd namespace create rbd/test3
+
+ rbd namespace list | grep 'test' | wc -l | grep '^3$'
+
+ expect_fail rbd namespace remove --pool rbd missing
+
+ rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image1
+
+ # default test1 ns to test2 ns clone
+ rbd bench --io-type write --io-pattern rand --io-total 32M --io-size 4K rbd/test1/image1
+ rbd snap create rbd/test1/image1@1
+ rbd clone --rbd-default-clone-format 2 rbd/test1/image1@1 rbd/test2/image1
+ rbd snap rm rbd/test1/image1@1
+ cmp <(rbd export rbd/test1/image1 -) <(rbd export rbd/test2/image1 -)
+ rbd rm rbd/test2/image1
+
+ # default ns to test1 ns clone
+ rbd create $RBD_CREATE_ARGS --size 1G rbd/image2
+ rbd bench --io-type write --io-pattern rand --io-total 32M --io-size 4K rbd/image2
+ rbd snap create rbd/image2@1
+ rbd clone --rbd-default-clone-format 2 rbd/image2@1 rbd/test2/image2
+ rbd snap rm rbd/image2@1
+ cmp <(rbd export rbd/image2 -) <(rbd export rbd/test2/image2 -)
+ expect_fail rbd rm rbd/image2
+ rbd rm rbd/test2/image2
+ rbd rm rbd/image2
+
+ # v1 clones are supported within the same namespace
+ rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image3
+ rbd snap create rbd/test1/image3@1
+ rbd snap protect rbd/test1/image3@1
+ rbd clone --rbd-default-clone-format 1 rbd/test1/image3@1 rbd/test1/image4
+ rbd rm rbd/test1/image4
+ rbd snap unprotect rbd/test1/image3@1
+ rbd snap rm rbd/test1/image3@1
+ rbd rm rbd/test1/image3
+
+ rbd create $RBD_CREATE_ARGS --size 1G --namespace test1 image2
+ expect_fail rbd namespace remove rbd/test1
+
+ rbd group create rbd/test1/group1
+ rbd group image add rbd/test1/group1 rbd/test1/image1
+ rbd group rm rbd/test1/group1
+
+ rbd trash move rbd/test1/image1
+ ID=`rbd trash --namespace test1 ls | cut -d ' ' -f 1`
+ rbd trash rm rbd/test1/${ID}
+
+ rbd remove rbd/test1/image2
+
+ rbd namespace remove --pool rbd --namespace test1
+ rbd namespace remove --namespace test3
+
+ rbd namespace list | grep 'test' | wc -l | grep '^1$'
+ rbd namespace remove rbd/test2
+}
+
+get_migration_state() {
+ local image=$1
+
+ rbd --format xml status $image |
+ $XMLSTARLET sel -t -v '//status/migration/state'
+}
+
+test_migration() {
+ echo "testing migration..."
+ remove_images
+ ceph osd pool create rbd2 8
+ rbd pool init rbd2
+
+ # Convert to new format
+ rbd create --image-format 1 -s 128M test1
+ rbd info test1 | grep 'format: 1'
+ rbd migration prepare test1 --image-format 2
+ test "$(get_migration_state test1)" = prepared
+ rbd info test1 | grep 'format: 2'
+ rbd rm test1 && exit 1 || true
+ rbd migration execute test1
+ test "$(get_migration_state test1)" = executed
+ rbd migration commit test1
+ get_migration_state test1 && exit 1 || true
+
+ # Enable layering (and some other features)
+ rbd info test1 | grep 'features: .*layering' && exit 1 || true
+ rbd migration prepare test1 --image-feature \
+ layering,exclusive-lock,object-map,fast-diff,deep-flatten
+ rbd info test1 | grep 'features: .*layering'
+ rbd migration execute test1
+ rbd migration commit test1
+
+ # Migration to other pool
+ rbd migration prepare test1 rbd2/test1
+ test "$(get_migration_state rbd2/test1)" = prepared
+ rbd ls | wc -l | grep '^0$'
+ rbd -p rbd2 ls | grep test1
+ rbd migration execute test1
+ test "$(get_migration_state rbd2/test1)" = executed
+ rbd rm rbd2/test1 && exit 1 || true
+ rbd migration commit test1
+
+ # Migration to other namespace
+ rbd namespace create rbd2/ns1
+ rbd namespace create rbd2/ns2
+ rbd migration prepare rbd2/test1 rbd2/ns1/test1
+ test "$(get_migration_state rbd2/ns1/test1)" = prepared
+ rbd migration execute rbd2/test1
+ test "$(get_migration_state rbd2/ns1/test1)" = executed
+ rbd migration commit rbd2/test1
+ rbd migration prepare rbd2/ns1/test1 rbd2/ns2/test1
+ rbd migration execute rbd2/ns2/test1
+ rbd migration commit rbd2/ns2/test1
+
+ # Enable data pool
+ rbd create -s 128M test1
+ rbd migration prepare test1 --data-pool rbd2
+ rbd info test1 | grep 'data_pool: rbd2'
+ rbd migration execute test1
+ rbd migration commit test1
+
+ # testing trash
+ rbd migration prepare test1
+ expect_fail rbd trash mv test1
+ ID=`rbd trash ls -a | cut -d ' ' -f 1`
+ expect_fail rbd trash rm $ID
+ expect_fail rbd trash restore $ID
+ rbd migration abort test1
+
+ # Migrate parent
+ rbd remove test1
+ dd if=/dev/urandom bs=1M count=1 | rbd --image-format 2 import - test1
+ md5sum=$(rbd export test1 - | md5sum)
+ rbd snap create test1@snap1
+ rbd snap protect test1@snap1
+ rbd snap create test1@snap2
+ rbd clone test1@snap1 clone_v1 --rbd_default_clone_format=1
+ rbd clone test1@snap2 clone_v2 --rbd_default_clone_format=2
+ rbd info clone_v1 | fgrep 'parent: rbd/test1@snap1'
+ rbd info clone_v2 | fgrep 'parent: rbd/test1@snap2'
+ rbd info clone_v2 |grep 'op_features: clone-child'
+ test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}"
+ test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}"
+ test "$(rbd children test1@snap1)" = "rbd/clone_v1"
+ test "$(rbd children test1@snap2)" = "rbd/clone_v2"
+ rbd migration prepare test1 rbd2/test2
+ rbd info clone_v1 | fgrep 'parent: rbd2/test2@snap1'
+ rbd info clone_v2 | fgrep 'parent: rbd2/test2@snap2'
+ rbd info clone_v2 | fgrep 'op_features: clone-child'
+ test "$(rbd children rbd2/test2@snap1)" = "rbd/clone_v1"
+ test "$(rbd children rbd2/test2@snap2)" = "rbd/clone_v2"
+ rbd migration execute test1
+ expect_fail rbd migration commit test1
+ rbd migration commit test1 --force
+ test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}"
+ test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}"
+ rbd migration prepare rbd2/test2 test1
+ rbd info clone_v1 | fgrep 'parent: rbd/test1@snap1'
+ rbd info clone_v2 | fgrep 'parent: rbd/test1@snap2'
+ rbd info clone_v2 | fgrep 'op_features: clone-child'
+ test "$(rbd children test1@snap1)" = "rbd/clone_v1"
+ test "$(rbd children test1@snap2)" = "rbd/clone_v2"
+ rbd migration execute test1
+ expect_fail rbd migration commit test1
+ rbd migration commit test1 --force
+ test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}"
+ test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}"
+ rbd remove clone_v1
+ rbd remove clone_v2
+ rbd snap unprotect test1@snap1
+ rbd snap purge test1
+ rbd rm test1
+
+ for format in 1 2; do
+ # Abort migration after successful prepare
+ rbd create -s 128M --image-format ${format} test2
+ rbd migration prepare test2 --data-pool rbd2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd migration abort test2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd rm test2
+
+ # Abort migration after successful execute
+ rbd create -s 128M --image-format ${format} test2
+ rbd migration prepare test2 --data-pool rbd2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd migration execute test2
+ rbd migration abort test2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd rm test2
+
+ # Migration is automatically aborted if prepare failed
+ rbd create -s 128M --image-format ${format} test2
+ rbd migration prepare test2 --data-pool INVALID_DATA_POOL && exit 1 || true
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd rm test2
+
+ # Abort migration to other pool
+ rbd create -s 128M --image-format ${format} test2
+ rbd migration prepare test2 rbd2/test2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 rbd2/test2
+ rbd migration abort test2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd rm test2
+
+ # The same but abort using destination image
+ rbd create -s 128M --image-format ${format} test2
+ rbd migration prepare test2 rbd2/test2
+ rbd migration abort rbd2/test2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd rm test2
+
+ test $format = 1 && continue
+
+ # Abort migration to other namespace
+ rbd create -s 128M --image-format ${format} test2
+ rbd migration prepare test2 rbd2/ns1/test3
+ rbd bench --io-type write --io-size 1024 --io-total 1024 rbd2/ns1/test3
+ rbd migration abort test2
+ rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+ rbd rm test2
+ done
+
+ remove_images
+ ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_config() {
+ echo "testing config..."
+ remove_images
+
+ expect_fail rbd config global set osd rbd_cache true
+ expect_fail rbd config global set global debug_ms 10
+ expect_fail rbd config global set global rbd_UNKNOWN false
+ expect_fail rbd config global set global rbd_cache INVALID
+ rbd config global set global rbd_cache false
+ rbd config global set client rbd_cache true
+ rbd config global set client.123 rbd_cache false
+ rbd config global get global rbd_cache | grep '^false$'
+ rbd config global get client rbd_cache | grep '^true$'
+ rbd config global get client.123 rbd_cache | grep '^false$'
+ expect_fail rbd config global get client.UNKNOWN rbd_cache
+ rbd config global list global | grep '^rbd_cache * false * global *$'
+ rbd config global list client | grep '^rbd_cache * true * client *$'
+ rbd config global list client.123 | grep '^rbd_cache * false * client.123 *$'
+ rbd config global list client.UNKNOWN | grep '^rbd_cache * true * client *$'
+ rbd config global rm client rbd_cache
+ expect_fail rbd config global get client rbd_cache
+ rbd config global list client | grep '^rbd_cache * false * global *$'
+ rbd config global rm client.123 rbd_cache
+ rbd config global rm global rbd_cache
+
+ rbd config pool set rbd rbd_cache true
+ rbd config pool list rbd | grep '^rbd_cache * true * pool *$'
+ rbd config pool get rbd rbd_cache | grep '^true$'
+
+ rbd create $RBD_CREATE_ARGS -s 1 test1
+
+ rbd config image list rbd/test1 | grep '^rbd_cache * true * pool *$'
+ rbd config image set rbd/test1 rbd_cache false
+ rbd config image list rbd/test1 | grep '^rbd_cache * false * image *$'
+ rbd config image get rbd/test1 rbd_cache | grep '^false$'
+ rbd config image remove rbd/test1 rbd_cache
+ expect_fail rbd config image get rbd/test1 rbd_cache
+ rbd config image list rbd/test1 | grep '^rbd_cache * true * pool *$'
+
+ rbd config pool remove rbd rbd_cache
+ expect_fail rbd config pool get rbd rbd_cache
+ rbd config pool list rbd | grep '^rbd_cache * true * config *$'
+
+ rbd rm test1
+}
+
+test_pool_image_args
+test_rename
+test_ls
+test_remove
+test_migration
+test_config
+RBD_CREATE_ARGS=""
+test_others
+test_locking
+test_thick_provision
+RBD_CREATE_ARGS="--image-format 2"
+test_others
+test_locking
+test_clone
+test_trash
+test_purge
+test_deep_copy_clone
+test_clone_v2
+test_thick_provision
+test_namespace
+
+echo OK
diff --git a/qa/workunits/rbd/concurrent.sh b/qa/workunits/rbd/concurrent.sh
new file mode 100755
index 00000000..abaad75f
--- /dev/null
+++ b/qa/workunits/rbd/concurrent.sh
@@ -0,0 +1,375 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2013 Inktank Storage, Inc.
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Alex Elder <elder@inktank.com>
+# January 29, 2013
+
+################################################################
+
+# The purpose of this test is to exercise paths through the rbd
+# code, making sure no bad pointer references or invalid reference
+# count operations occur in the face of concurrent activity.
+#
+# Each pass of the test creates an rbd image, maps it, and writes
+# some data into the image. It also reads some data from all of the
+# other images that exist at the time the pass executes. Finally,
+# the image is unmapped and removed. The image removal completes in
+# the background.
+#
+# An iteration of the test consists of performing some number of
+# passes, initating each pass as a background job, and finally
+# sleeping for a variable delay. The delay is initially a specified
+# value, but each iteration shortens that proportionally, such that
+# the last iteration will not delay at all.
+#
+# The result exercises concurrent creates and deletes of rbd images,
+# writes to new images, reads from both written and unwritten image
+# data (including reads concurrent with writes), and attempts to
+# unmap images being read.
+
+# Usage: concurrent [-i <iter>] [-c <count>] [-d <delay>]
+#
+# Exit status:
+# 0: success
+# 1: usage error
+# 2: other runtime error
+# 99: argument count error (programming error)
+# 100: getopt error (internal error)
+
+################################################################
+
+set -ex
+
+# Default flag values; RBD_CONCURRENT_ITER names are intended
+# to be used in yaml scripts to pass in alternate values, e.g.:
+# env:
+# RBD_CONCURRENT_ITER: 20
+# RBD_CONCURRENT_COUNT: 5
+# RBD_CONCURRENT_DELAY: 3
+ITER_DEFAULT=${RBD_CONCURRENT_ITER:-100}
+COUNT_DEFAULT=${RBD_CONCURRENT_COUNT:-5}
+DELAY_DEFAULT=${RBD_CONCURRENT_DELAY:-5} # seconds
+
+CEPH_SECRET_FILE=${CEPH_SECRET_FILE:-}
+CEPH_ID=${CEPH_ID:-admin}
+SECRET_ARGS=""
+if [ "${CEPH_SECRET_FILE}" ]; then
+ SECRET_ARGS="--secret $CEPH_SECRET_FILE"
+fi
+
+################################################################
+
+function setup() {
+ ID_MAX_DIR=$(mktemp -d /tmp/image_max_id.XXXXX)
+ ID_COUNT_DIR=$(mktemp -d /tmp/image_ids.XXXXXX)
+ NAMES_DIR=$(mktemp -d /tmp/image_names.XXXXXX)
+ SOURCE_DATA=$(mktemp /tmp/source_data.XXXXXX)
+
+ # Use urandom to generate SOURCE_DATA
+ dd if=/dev/urandom of=${SOURCE_DATA} bs=2048 count=66 \
+ >/dev/null 2>&1
+
+ # List of rbd id's *not* created by this script
+ export INITIAL_RBD_IDS=$(ls /sys/bus/rbd/devices)
+
+ # Set up some environment for normal teuthology test setup.
+ # This really should not be necessary but I found it was.
+
+ export CEPH_ARGS=" --name client.0"
+}
+
+function cleanup() {
+ [ ! "${ID_MAX_DIR}" ] && return
+ local id
+ local image
+
+ # Unmap mapped devices
+ for id in $(rbd_ids); do
+ image=$(cat "/sys/bus/rbd/devices/${id}/name")
+ rbd_unmap_image "${id}"
+ rbd_destroy_image "${image}"
+ done
+ # Get any leftover images
+ for image in $(rbd ls 2>/dev/null); do
+ rbd_destroy_image "${image}"
+ done
+ wait
+ sync
+ rm -f "${SOURCE_DATA}"
+ [ -d "${NAMES_DIR}" ] && rmdir "${NAMES_DIR}"
+ echo "Max concurrent rbd image count was $(get_max "${ID_COUNT_DIR}")"
+ rm -rf "${ID_COUNT_DIR}"
+ echo "Max rbd image id was $(get_max "${ID_MAX_DIR}")"
+ rm -rf "${ID_MAX_DIR}"
+}
+
+function get_max() {
+ [ $# -eq 1 ] || exit 99
+ local dir="$1"
+
+ ls -U "${dir}" | sort -n | tail -1
+}
+
+trap cleanup HUP INT QUIT
+
+# print a usage message and quit
+#
+# if a message is supplied, print that first, and then exit
+# with non-zero status
+function usage() {
+ if [ $# -gt 0 ]; then
+ echo "" >&2
+ echo "$@" >&2
+ fi
+
+ echo "" >&2
+ echo "Usage: ${PROGNAME} <options> <tests>" >&2
+ echo "" >&2
+ echo " options:" >&2
+ echo " -h or --help" >&2
+ echo " show this message" >&2
+ echo " -i or --iterations" >&2
+ echo " iteration count (1 or more)" >&2
+ echo " -c or --count" >&2
+ echo " images created per iteration (1 or more)" >&2
+ echo " -d or --delay" >&2
+ echo " maximum delay between iterations" >&2
+ echo "" >&2
+ echo " defaults:" >&2
+ echo " iterations: ${ITER_DEFAULT}"
+ echo " count: ${COUNT_DEFAULT}"
+ echo " delay: ${DELAY_DEFAULT} (seconds)"
+ echo "" >&2
+
+ [ $# -gt 0 ] && exit 1
+
+ exit 0 # This is used for a --help
+}
+
+# parse command line arguments
+function parseargs() {
+ ITER="${ITER_DEFAULT}"
+ COUNT="${COUNT_DEFAULT}"
+ DELAY="${DELAY_DEFAULT}"
+
+ # Short option flags
+ SHORT_OPTS=""
+ SHORT_OPTS="${SHORT_OPTS},h"
+ SHORT_OPTS="${SHORT_OPTS},i:"
+ SHORT_OPTS="${SHORT_OPTS},c:"
+ SHORT_OPTS="${SHORT_OPTS},d:"
+
+ # Short option flags
+ LONG_OPTS=""
+ LONG_OPTS="${LONG_OPTS},help"
+ LONG_OPTS="${LONG_OPTS},iterations:"
+ LONG_OPTS="${LONG_OPTS},count:"
+ LONG_OPTS="${LONG_OPTS},delay:"
+
+ TEMP=$(getopt --name "${PROGNAME}" \
+ --options "${SHORT_OPTS}" \
+ --longoptions "${LONG_OPTS}" \
+ -- "$@")
+ eval set -- "$TEMP"
+
+ while [ "$1" != "--" ]; do
+ case "$1" in
+ -h|--help)
+ usage
+ ;;
+ -i|--iterations)
+ ITER="$2"
+ [ "${ITER}" -lt 1 ] &&
+ usage "bad iterations value"
+ shift
+ ;;
+ -c|--count)
+ COUNT="$2"
+ [ "${COUNT}" -lt 1 ] &&
+ usage "bad count value"
+ shift
+ ;;
+ -d|--delay)
+ DELAY="$2"
+ shift
+ ;;
+ *)
+ exit 100 # Internal error
+ ;;
+ esac
+ shift
+ done
+ shift
+}
+
+function rbd_ids() {
+ [ $# -eq 0 ] || exit 99
+ local ids
+ local i
+
+ [ -d /sys/bus/rbd ] || return
+ ids=" $(echo $(ls /sys/bus/rbd/devices)) "
+ for i in ${INITIAL_RBD_IDS}; do
+ ids=${ids/ ${i} / }
+ done
+ echo ${ids}
+}
+
+function update_maxes() {
+ local ids="$@"
+ local last_id
+ # These aren't 100% safe against concurrent updates but it
+ # should be pretty close
+ count=$(echo ${ids} | wc -w)
+ touch "${ID_COUNT_DIR}/${count}"
+ last_id=${ids% }
+ last_id=${last_id##* }
+ touch "${ID_MAX_DIR}/${last_id}"
+}
+
+function rbd_create_image() {
+ [ $# -eq 0 ] || exit 99
+ local image=$(basename $(mktemp "${NAMES_DIR}/image.XXXXXX"))
+
+ rbd create "${image}" --size=1024
+ echo "${image}"
+}
+
+function rbd_image_id() {
+ [ $# -eq 1 ] || exit 99
+ local image="$1"
+
+ grep -l "${image}" /sys/bus/rbd/devices/*/name 2>/dev/null |
+ cut -d / -f 6
+}
+
+function rbd_map_image() {
+ [ $# -eq 1 ] || exit 99
+ local image="$1"
+ local id
+
+ sudo rbd map "${image}" --user "${CEPH_ID}" ${SECRET_ARGS} \
+ > /dev/null 2>&1
+
+ id=$(rbd_image_id "${image}")
+ echo "${id}"
+}
+
+function rbd_write_image() {
+ [ $# -eq 1 ] || exit 99
+ local id="$1"
+
+ # Offset and size here are meant to ensure beginning and end
+ # cross both (4K or 64K) page and (4MB) rbd object boundaries.
+ # It assumes the SOURCE_DATA file has size 66 * 2048 bytes
+ dd if="${SOURCE_DATA}" of="/dev/rbd${id}" bs=2048 seek=2015 \
+ > /dev/null 2>&1
+}
+
+# All starting and ending offsets here are selected so they are not
+# aligned on a (4 KB or 64 KB) page boundary
+function rbd_read_image() {
+ [ $# -eq 1 ] || exit 99
+ local id="$1"
+
+ # First read starting and ending at an offset before any
+ # written data. The osd zero-fills data read from an
+ # existing rbd object, but before any previously-written
+ # data.
+ dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=3 \
+ > /dev/null 2>&1
+ # Next read starting at an offset before any written data,
+ # but ending at an offset that includes data that's been
+ # written. The osd zero-fills unwritten data at the
+ # beginning of a read.
+ dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=1983 \
+ > /dev/null 2>&1
+ # Read the data at offset 2015 * 2048 bytes (where it was
+ # written) and make sure it matches the original data.
+ cmp --quiet "${SOURCE_DATA}" "/dev/rbd${id}" 0 4126720 ||
+ echo "MISMATCH!!!"
+ # Now read starting within the pre-written data, but ending
+ # beyond it. The rbd client zero-fills the unwritten
+ # portion at the end of a read.
+ dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=2079 \
+ > /dev/null 2>&1
+ # Now read starting from an unwritten range within a written
+ # rbd object. The rbd client zero-fills this.
+ dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=2115 \
+ > /dev/null 2>&1
+ # Finally read from an unwritten region which would reside
+ # in a different (non-existent) osd object. The osd client
+ # zero-fills unwritten data when the target object doesn't
+ # exist.
+ dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=4098 \
+ > /dev/null 2>&1
+}
+
+function rbd_unmap_image() {
+ [ $# -eq 1 ] || exit 99
+ local id="$1"
+
+ sudo rbd unmap "/dev/rbd${id}"
+}
+
+function rbd_destroy_image() {
+ [ $# -eq 1 ] || exit 99
+ local image="$1"
+
+ # Don't wait for it to complete, to increase concurrency
+ rbd rm "${image}" >/dev/null 2>&1 &
+ rm -f "${NAMES_DIR}/${image}"
+}
+
+function one_pass() {
+ [ $# -eq 0 ] || exit 99
+ local image
+ local id
+ local ids
+ local i
+
+ image=$(rbd_create_image)
+ id=$(rbd_map_image "${image}")
+ ids=$(rbd_ids)
+ update_maxes "${ids}"
+ for i in ${rbd_ids}; do
+ if [ "${i}" -eq "${id}" ]; then
+ rbd_write_image "${i}"
+ else
+ rbd_read_image "${i}"
+ fi
+ done
+ rbd_unmap_image "${id}"
+ rbd_destroy_image "${image}"
+}
+
+################################################################
+
+parseargs "$@"
+
+setup
+
+for iter in $(seq 1 "${ITER}"); do
+ for count in $(seq 1 "${COUNT}"); do
+ one_pass &
+ done
+ # Sleep longer at first, overlap iterations more later.
+ # Use awk to get sub-second granularity (see sleep(1)).
+ sleep $(echo "${DELAY}" "${iter}" "${ITER}" |
+ awk '{ printf("%.2f\n", $1 - $1 * $2 / $3);}')
+
+done
+wait
+
+cleanup
+
+exit 0
diff --git a/qa/workunits/rbd/diff.sh b/qa/workunits/rbd/diff.sh
new file mode 100755
index 00000000..fbd6e064
--- /dev/null
+++ b/qa/workunits/rbd/diff.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+set -ex
+
+function cleanup() {
+ rbd snap purge foo || :
+ rbd rm foo || :
+ rbd snap purge foo.copy || :
+ rbd rm foo.copy || :
+ rbd snap purge foo.copy2 || :
+ rbd rm foo.copy2 || :
+ rm -f foo.diff foo.out
+}
+
+cleanup
+
+rbd create foo --size 1000
+rbd bench --io-type write foo --io-size 4096 --io-threads 5 --io-total 4096000 --io-pattern rand
+
+#rbd cp foo foo.copy
+rbd create foo.copy --size 1000
+rbd export-diff foo - | rbd import-diff - foo.copy
+
+rbd snap create foo --snap=two
+rbd bench --io-type write foo --io-size 4096 --io-threads 5 --io-total 4096000 --io-pattern rand
+rbd snap create foo --snap=three
+rbd snap create foo.copy --snap=two
+
+rbd export-diff foo@two --from-snap three foo.diff && exit 1 || true # wrong snap order
+rm -f foo.diff
+
+rbd export-diff foo@three --from-snap two foo.diff
+rbd import-diff foo.diff foo.copy
+rbd import-diff foo.diff foo.copy && exit 1 || true # this should fail with EEXIST on the end snap
+rbd snap ls foo.copy | grep three
+
+rbd create foo.copy2 --size 1000
+rbd import-diff foo.diff foo.copy2 && exit 1 || true # this should fail bc the start snap dne
+
+rbd export foo foo.out
+orig=`md5sum foo.out | awk '{print $1}'`
+rm foo.out
+rbd export foo.copy foo.out
+copy=`md5sum foo.out | awk '{print $1}'`
+
+if [ "$orig" != "$copy" ]; then
+ echo does not match
+ exit 1
+fi
+
+cleanup
+
+echo OK
+
diff --git a/qa/workunits/rbd/diff_continuous.sh b/qa/workunits/rbd/diff_continuous.sh
new file mode 100755
index 00000000..b8f7e8b7
--- /dev/null
+++ b/qa/workunits/rbd/diff_continuous.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+set -ex
+
+max=20
+size=1500
+
+iosize=16384
+iototal=16384000
+iothreads=16
+
+parent=`uuidgen`"-parent"
+src=`uuidgen`"-src";
+dst=`uuidgen`"-dst";
+
+function cleanup() {
+ rbd snap purge $src || :
+ rbd rm $src || :
+ rbd snap purge $dst || :
+ rbd rm $dst || :
+ rbd snap unprotect $parent --snap parent || :
+ rbd snap purge $parent || :
+ rbd rm $parent || :
+}
+trap cleanup EXIT
+
+# start from a clone
+rbd create $parent --size $size --image-format 2 --stripe-count 8 --stripe-unit 65536
+rbd bench --io-type write $parent --io-size $iosize --io-threads $iothreads --io-total $iototal --io-pattern rand
+rbd snap create $parent --snap parent
+rbd snap protect $parent --snap parent
+rbd clone $parent@parent $src --stripe-count 4 --stripe-unit 262144
+rbd create $dst --size $size --image-format 2 --order 19
+
+# mirror for a while
+for s in `seq 1 $max`; do
+ rbd snap create $src --snap=snap$s
+ rbd export-diff $src@snap$s - $lastsnap | rbd import-diff - $dst &
+ rbd bench --io-type write $src --io-size $iosize --io-threads $iothreads --io-total $iototal --io-pattern rand &
+ wait
+ lastsnap="--from-snap snap$s"
+done
+
+#trap "" EXIT
+#exit 0
+
+# validate
+for s in `seq 1 $max`; do
+ ssum=`rbd export $src@snap$s - | md5sum`
+ dsum=`rbd export $dst@snap$s - | md5sum`
+ if [ "$ssum" != "$dsum" ]; then
+ echo different sum at snap$s
+ exit 1
+ fi
+done
+
+cleanup
+trap "" EXIT
+
+echo OK
+
diff --git a/qa/workunits/rbd/huge-tickets.sh b/qa/workunits/rbd/huge-tickets.sh
new file mode 100755
index 00000000..22853c07
--- /dev/null
+++ b/qa/workunits/rbd/huge-tickets.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# This is a test for http://tracker.ceph.com/issues/8979 and the fallout
+# from triaging it. #8979 itself was random crashes on corrupted memory
+# due to a buffer overflow (for tickets larger than 256 bytes), further
+# inspection showed that vmalloced tickets weren't handled correctly as
+# well.
+#
+# What we are doing here is generating three huge keyrings and feeding
+# them to libceph (through 'rbd map' on a scratch image). Bad kernels
+# will crash reliably either on corrupted memory somewhere or a bad page
+# fault in scatterwalk_pagedone().
+
+set -ex
+
+function generate_keyring() {
+ local user=$1
+ local n=$2
+
+ ceph-authtool -C -n client.$user --cap mon 'allow *' --gen-key /tmp/keyring-$user
+
+ set +x # don't pollute trace with echos
+ echo -en "\tcaps osd = \"allow rwx pool=rbd" >>/tmp/keyring-$user
+ for i in $(seq 1 $n); do
+ echo -n ", allow rwx pool=pool$i" >>/tmp/keyring-$user
+ done
+ echo "\"" >>/tmp/keyring-$user
+ set -x
+}
+
+generate_keyring foo 1000 # ~25K, kmalloc
+generate_keyring bar 20000 # ~500K, vmalloc
+generate_keyring baz 300000 # ~8M, vmalloc + sg chaining
+
+rbd create --size 1 test
+
+for user in {foo,bar,baz}; do
+ ceph auth import -i /tmp/keyring-$user
+ DEV=$(sudo rbd map -n client.$user --keyring /tmp/keyring-$user test)
+ sudo rbd unmap $DEV
+done
diff --git a/qa/workunits/rbd/image_read.sh b/qa/workunits/rbd/image_read.sh
new file mode 100755
index 00000000..ddca8356
--- /dev/null
+++ b/qa/workunits/rbd/image_read.sh
@@ -0,0 +1,680 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2013 Inktank Storage, Inc.
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Alex Elder <elder@inktank.com>
+# April 10, 2013
+
+################################################################
+
+# The purpose of this test is to validate that data read from a
+# mapped rbd image is what it's expected to be.
+#
+# By default it creates an image and fills it with some data. It
+# then reads back the data at a series of offsets known to cover
+# various situations (such as reading the beginning, end, or the
+# entirety of an object, or doing a read that spans multiple
+# objects), and stashes the results in a set of local files.
+#
+# It also creates and maps a snapshot of the original image after
+# it's been filled, and reads back the same ranges of data from the
+# snapshot. It then compares the data read back with what was read
+# back from the original image, verifying they match.
+#
+# Clone functionality is tested as well, in which case a clone is
+# made of the snapshot, and the same ranges of data are again read
+# and compared with the original. In addition, a snapshot of that
+# clone is created, and a clone of *that* snapshot is put through
+# the same set of tests. (Clone testing can be optionally skipped.)
+
+################################################################
+
+# Default parameter values. Environment variables, if set, will
+# supercede these defaults. Such variables have names that begin
+# with "IMAGE_READ_", for e.g. use IMAGE_READ_PAGE_SIZE=65536
+# to use 65536 as the page size.
+set -e
+
+DEFAULT_VERBOSE=true
+DEFAULT_TEST_CLONES=true
+DEFAULT_LOCAL_FILES=false
+DEFAULT_FORMAT=2
+DEFAULT_DOUBLE_ORDER=true
+DEFAULT_HALF_ORDER=false
+DEFAULT_PAGE_SIZE=4096
+DEFAULT_OBJECT_ORDER=22
+MIN_OBJECT_ORDER=12 # technically 9, but the rbd CLI enforces 12
+MAX_OBJECT_ORDER=32
+
+RBD_FORCE_ALLOW_V1=1
+
+PROGNAME=$(basename $0)
+
+ORIGINAL=original-$$
+SNAP1=snap1-$$
+CLONE1=clone1-$$
+SNAP2=snap2-$$
+CLONE2=clone2-$$
+
+function err() {
+ if [ $# -gt 0 ]; then
+ echo "${PROGNAME}: $@" >&2
+ fi
+ exit 2
+}
+
+function usage() {
+ if [ $# -gt 0 ]; then
+ echo "" >&2
+ echo "${PROGNAME}: $@" >&2
+ fi
+ echo "" >&2
+ echo "Usage: ${PROGNAME} [<options>]" >&2
+ echo "" >&2
+ echo "options are:" >&2
+ echo " -o object_order" >&2
+ echo " must be ${MIN_OBJECT_ORDER}..${MAX_OBJECT_ORDER}" >&2
+ echo " -p page_size (in bytes)" >&2
+ echo " note: there must be at least 4 pages per object" >&2
+ echo " -1" >&2
+ echo " test using format 1 rbd images (default)" >&2
+ echo " -2" >&2
+ echo " test using format 2 rbd images" >&2
+ echo " -c" >&2
+ echo " also test rbd clone images (implies format 2)" >&2
+ echo " -d" >&2
+ echo " clone object order double its parent's (format 2)" >&2
+ echo " -h" >&2
+ echo " clone object order half of its parent's (format 2)" >&2
+ echo " -l" >&2
+ echo " use local files rather than rbd images" >&2
+ echo " -v" >&2
+ echo " disable reporting of what's going on" >&2
+ echo "" >&2
+ exit 1
+}
+
+function verbose() {
+ [ "${VERBOSE}" = true ] && echo "$@"
+ true # Don't let the verbose test spoil our return value
+}
+
+function quiet() {
+ "$@" 2> /dev/null
+}
+
+function boolean_toggle() {
+ [ $# -eq 1 ] || exit 99
+ test "$1" = "true" && echo false || echo true
+}
+
+function parseargs() {
+ local opts="o:p:12clv"
+ local lopts="order:,page_size:,local,clone,verbose"
+ local parsed
+ local clone_order_msg
+
+ # use values from environment if available
+ VERBOSE="${IMAGE_READ_VERBOSE:-${DEFAULT_VERBOSE}}"
+ TEST_CLONES="${IMAGE_READ_TEST_CLONES:-${DEFAULT_TEST_CLONES}}"
+ LOCAL_FILES="${IMAGE_READ_LOCAL_FILES:-${DEFAULT_LOCAL_FILES}}"
+ DOUBLE_ORDER="${IMAGE_READ_DOUBLE_ORDER:-${DEFAULT_DOUBLE_ORDER}}"
+ HALF_ORDER="${IMAGE_READ_HALF_ORDER:-${DEFAULT_HALF_ORDER}}"
+ FORMAT="${IMAGE_READ_FORMAT:-${DEFAULT_FORMAT}}"
+ PAGE_SIZE="${IMAGE_READ_PAGE_SIZE:-${DEFAULT_PAGE_SIZE}}"
+ OBJECT_ORDER="${IMAGE_READ_OBJECT_ORDER:-${DEFAULT_OBJECT_ORDER}}"
+
+ parsed=$(getopt -o "${opts}" -l "${lopts}" -n "${PROGNAME}" -- "$@") ||
+ usage
+ eval set -- "${parsed}"
+ while true; do
+ case "$1" in
+ -v|--verbose)
+ VERBOSE=$(boolean_toggle "${VERBOSE}");;
+ -c|--clone)
+ TEST_CLONES=$(boolean_toggle "${TEST_CLONES}");;
+ -d|--double)
+ DOUBLE_ORDER=$(boolean_toggle "${DOUBLE_ORDER}");;
+ -h|--half)
+ HALF_ORDER=$(boolean_toggle "${HALF_ORDER}");;
+ -l|--local)
+ LOCAL_FILES=$(boolean_toggle "${LOCAL_FILES}");;
+ -1|-2)
+ FORMAT="${1:1}";;
+ -p|--page_size)
+ PAGE_SIZE="$2"; shift;;
+ -o|--order)
+ OBJECT_ORDER="$2"; shift;;
+ --)
+ shift; break;;
+ *)
+ err "getopt internal error"
+ esac
+ shift
+ done
+ [ $# -gt 0 ] && usage "excess arguments ($*)"
+
+ if [ "${TEST_CLONES}" = true ]; then
+ # If we're using different object orders for clones,
+ # make sure the limits are updated accordingly. If
+ # both "half" and "double" are specified, just
+ # ignore them both.
+ if [ "${DOUBLE_ORDER}" = true ]; then
+ if [ "${HALF_ORDER}" = true ]; then
+ DOUBLE_ORDER=false
+ HALF_ORDER=false
+ else
+ ((MAX_OBJECT_ORDER -= 2))
+ fi
+ elif [ "${HALF_ORDER}" = true ]; then
+ ((MIN_OBJECT_ORDER += 2))
+ fi
+ fi
+
+ [ "${OBJECT_ORDER}" -lt "${MIN_OBJECT_ORDER}" ] &&
+ usage "object order (${OBJECT_ORDER}) must be" \
+ "at least ${MIN_OBJECT_ORDER}"
+ [ "${OBJECT_ORDER}" -gt "${MAX_OBJECT_ORDER}" ] &&
+ usage "object order (${OBJECT_ORDER}) must be" \
+ "at most ${MAX_OBJECT_ORDER}"
+
+ if [ "${TEST_CLONES}" = true ]; then
+ if [ "${DOUBLE_ORDER}" = true ]; then
+ ((CLONE1_ORDER = OBJECT_ORDER + 1))
+ ((CLONE2_ORDER = OBJECT_ORDER + 2))
+ clone_order_msg="double"
+ elif [ "${HALF_ORDER}" = true ]; then
+ ((CLONE1_ORDER = OBJECT_ORDER - 1))
+ ((CLONE2_ORDER = OBJECT_ORDER - 2))
+ clone_order_msg="half of"
+ else
+ CLONE1_ORDER="${OBJECT_ORDER}"
+ CLONE2_ORDER="${OBJECT_ORDER}"
+ clone_order_msg="the same as"
+ fi
+ fi
+
+ [ "${TEST_CLONES}" != true ] || FORMAT=2
+
+ OBJECT_SIZE=$(echo "2 ^ ${OBJECT_ORDER}" | bc)
+ OBJECT_PAGES=$(echo "${OBJECT_SIZE} / ${PAGE_SIZE}" | bc)
+ IMAGE_SIZE=$((2 * 16 * OBJECT_SIZE / (1024 * 1024)))
+ [ "${IMAGE_SIZE}" -lt 1 ] && IMAGE_SIZE=1
+ IMAGE_OBJECTS=$((IMAGE_SIZE * (1024 * 1024) / OBJECT_SIZE))
+
+ [ "${OBJECT_PAGES}" -lt 4 ] &&
+ usage "object size (${OBJECT_SIZE}) must be" \
+ "at least 4 * page size (${PAGE_SIZE})"
+
+ echo "parameters for this run:"
+ echo " format ${FORMAT} images will be tested"
+ echo " object order is ${OBJECT_ORDER}, so" \
+ "objects are ${OBJECT_SIZE} bytes"
+ echo " page size is ${PAGE_SIZE} bytes, so" \
+ "there are are ${OBJECT_PAGES} pages in an object"
+ echo " derived image size is ${IMAGE_SIZE} MB, so" \
+ "there are ${IMAGE_OBJECTS} objects in an image"
+ if [ "${TEST_CLONES}" = true ]; then
+ echo " clone functionality will be tested"
+ echo " object size for a clone will be ${clone_order_msg}"
+ echo " the object size of its parent image"
+ fi
+
+ true # Don't let the clones test spoil our return value
+}
+
+function image_dev_path() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ echo "${TEMP}/${image_name}"
+ return
+ fi
+
+ echo "/dev/rbd/rbd/${image_name}"
+}
+
+function out_data_dir() {
+ [ $# -lt 2 ] || exit 99
+ local out_data="${TEMP}/data"
+ local image_name
+
+ if [ $# -eq 1 ]; then
+ image_name="$1"
+ echo "${out_data}/${image_name}"
+ else
+ echo "${out_data}"
+ fi
+}
+
+function setup() {
+ verbose "===== setting up ====="
+ TEMP=$(mktemp -d /tmp/rbd_image_read.XXXXX)
+ mkdir -p $(out_data_dir)
+
+ # create and fill the original image with some data
+ create_image "${ORIGINAL}"
+ map_image "${ORIGINAL}"
+ fill_original
+
+ # create a snapshot of the original
+ create_image_snap "${ORIGINAL}" "${SNAP1}"
+ map_image_snap "${ORIGINAL}" "${SNAP1}"
+
+ if [ "${TEST_CLONES}" = true ]; then
+ # create a clone of the original snapshot
+ create_snap_clone "${ORIGINAL}" "${SNAP1}" \
+ "${CLONE1}" "${CLONE1_ORDER}"
+ map_image "${CLONE1}"
+
+ # create a snapshot of that clone
+ create_image_snap "${CLONE1}" "${SNAP2}"
+ map_image_snap "${CLONE1}" "${SNAP2}"
+
+ # create a clone of that clone's snapshot
+ create_snap_clone "${CLONE1}" "${SNAP2}" \
+ "${CLONE2}" "${CLONE2_ORDER}"
+ map_image "${CLONE2}"
+ fi
+}
+
+function teardown() {
+ verbose "===== cleaning up ====="
+ if [ "${TEST_CLONES}" = true ]; then
+ unmap_image "${CLONE2}" || true
+ destroy_snap_clone "${CLONE1}" "${SNAP2}" "${CLONE2}" || true
+
+ unmap_image_snap "${CLONE1}" "${SNAP2}" || true
+ destroy_image_snap "${CLONE1}" "${SNAP2}" || true
+
+ unmap_image "${CLONE1}" || true
+ destroy_snap_clone "${ORIGINAL}" "${SNAP1}" "${CLONE1}" || true
+ fi
+ unmap_image_snap "${ORIGINAL}" "${SNAP1}" || true
+ destroy_image_snap "${ORIGINAL}" "${SNAP1}" || true
+ unmap_image "${ORIGINAL}" || true
+ destroy_image "${ORIGINAL}" || true
+
+ rm -rf $(out_data_dir)
+ rmdir "${TEMP}"
+}
+
+function create_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local image_path
+ local bytes
+
+ verbose "creating image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ image_path=$(image_dev_path "${image_name}")
+ bytes=$(echo "${IMAGE_SIZE} * 1024 * 1024 - 1" | bc)
+ quiet dd if=/dev/zero bs=1 count=1 seek="${bytes}" \
+ of="${image_path}"
+ return
+ fi
+
+ rbd create "${image_name}" --image-format "${FORMAT}" \
+ --size "${IMAGE_SIZE}" --order "${OBJECT_ORDER}" \
+ --image-shared
+}
+
+function destroy_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local image_path
+
+ verbose "destroying image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ image_path=$(image_dev_path "${image_name}")
+ rm -f "${image_path}"
+ return
+ fi
+
+ rbd rm "${image_name}"
+}
+
+function map_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1" # can be image@snap too
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+
+ sudo rbd map "${image_name}"
+}
+
+function unmap_image() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1" # can be image@snap too
+ local image_path
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+ image_path=$(image_dev_path "${image_name}")
+
+ if [ -e "${image_path}" ]; then
+ sudo rbd unmap "${image_path}"
+ fi
+}
+
+function map_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+
+ image_snap="${image_name}@${snap_name}"
+ map_image "${image_snap}"
+}
+
+function unmap_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap
+
+ if [ "${LOCAL_FILES}" = true ]; then
+ return
+ fi
+
+ image_snap="${image_name}@${snap_name}"
+ unmap_image "${image_snap}"
+}
+
+function create_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap="${image_name}@${snap_name}"
+ local image_path
+ local snap_path
+
+ verbose "creating snapshot \"${snap_name}\"" \
+ "of image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ image_path=$(image_dev_path "${image_name}")
+ snap_path=$(image_dev_path "${image_snap}")
+
+ cp "${image_path}" "${snap_path}"
+ return
+ fi
+
+ rbd snap create "${image_snap}"
+}
+
+function destroy_image_snap() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local image_snap="${image_name}@${snap_name}"
+ local snap_path
+
+ verbose "destroying snapshot \"${snap_name}\"" \
+ "of image \"${image_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ snap_path=$(image_dev_path "${image_snap}")
+ rm -rf "${snap_path}"
+ return
+ fi
+
+ rbd snap rm "${image_snap}"
+}
+
+function create_snap_clone() {
+ [ $# -eq 4 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local clone_name="$3"
+ local clone_order="$4"
+ local image_snap="${image_name}@${snap_name}"
+ local snap_path
+ local clone_path
+
+ verbose "creating clone image \"${clone_name}\"" \
+ "of image snapshot \"${image_name}@${snap_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ snap_path=$(image_dev_path "${image_name}@${snap_name}")
+ clone_path=$(image_dev_path "${clone_name}")
+
+ cp "${snap_path}" "${clone_path}"
+ return
+ fi
+
+ rbd snap protect "${image_snap}"
+ rbd clone --order "${clone_order}" --image-shared \
+ "${image_snap}" "${clone_name}"
+}
+
+function destroy_snap_clone() {
+ [ $# -eq 3 ] || exit 99
+ local image_name="$1"
+ local snap_name="$2"
+ local clone_name="$3"
+ local image_snap="${image_name}@${snap_name}"
+ local clone_path
+
+ verbose "destroying clone image \"${clone_name}\""
+ if [ "${LOCAL_FILES}" = true ]; then
+ clone_path=$(image_dev_path "${clone_name}")
+
+ rm -rf "${clone_path}"
+ return
+ fi
+
+ rbd rm "${clone_name}"
+ rbd snap unprotect "${image_snap}"
+}
+
+# function that produces "random" data with which to fill the image
+function source_data() {
+ while quiet dd if=/bin/bash skip=$(($$ % 199)) bs="${PAGE_SIZE}"; do
+ : # Just do the dd
+ done
+}
+
+function fill_original() {
+ local image_path=$(image_dev_path "${ORIGINAL}")
+
+ verbose "filling original image"
+ # Fill 16 objects worth of "random" data
+ source_data |
+ quiet dd bs="${PAGE_SIZE}" count=$((16 * OBJECT_PAGES)) \
+ of="${image_path}"
+}
+
+function do_read() {
+ [ $# -eq 3 -o $# -eq 4 ] || exit 99
+ local image_name="$1"
+ local offset="$2"
+ local length="$3"
+ [ "${length}" -gt 0 ] || err "do_read: length must be non-zero"
+ local image_path=$(image_dev_path "${image_name}")
+ local out_data=$(out_data_dir "${image_name}")
+ local range=$(printf "%06u~%04u" "${offset}" "${length}")
+ local out_file
+
+ [ $# -eq 4 ] && offset=$((offset + 16 * OBJECT_PAGES))
+
+ verbose "reading \"${image_name}\" pages ${range}"
+
+ out_file="${out_data}/pages_${range}"
+
+ quiet dd bs="${PAGE_SIZE}" skip="${offset}" count="${length}" \
+ if="${image_path}" of="${out_file}"
+}
+
+function one_pass() {
+ [ $# -eq 1 -o $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local extended
+ [ $# -eq 2 ] && extended="true"
+ local offset
+ local length
+
+ offset=0
+
+ # +-----------+-----------+---
+ # |X:X:X...X:X| : : ... : | :
+ # +-----------+-----------+---
+ length="${OBJECT_PAGES}"
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : |X: : ... : | :
+ # ---+-----------+---
+ length=1
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : | :X: ... : | :
+ # ---+-----------+---
+ length=1
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : | : :X...X: | :
+ # ---+-----------+---
+ length=$((OBJECT_PAGES - 3))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : | : : ... :X| :
+ # ---+-----------+---
+ length=1
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+---
+ # : |X:X:X...X:X| :
+ # ---+-----------+---
+ length="${OBJECT_PAGES}"
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ offset=$((offset + 1)) # skip 1
+
+ # ---+-----------+---
+ # : | :X:X...X:X| :
+ # ---+-----------+---
+ length=$((OBJECT_PAGES - 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : |X:X:X...X:X|X: : ... : | :
+ # ---+-----------+-----------+---
+ length=$((OBJECT_PAGES + 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : | :X:X...X:X|X: : ... : | :
+ # ---+-----------+-----------+---
+ length="${OBJECT_PAGES}"
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : | :X:X...X:X|X:X: ... : | :
+ # ---+-----------+-----------+---
+ length=$((OBJECT_PAGES + 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # ---+-----------+-----------+---
+ # : | : :X...X:X|X:X:X...X:X| :
+ # ---+-----------+-----------+---
+ length=$((2 * OBJECT_PAGES + 2))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ offset=$((offset + 1)) # skip 1
+
+ # ---+-----------+-----------+-----
+ # : | :X:X...X:X|X:X:X...X:X|X: :
+ # ---+-----------+-----------+-----
+ length=$((2 * OBJECT_PAGES))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ offset=$((offset + length))
+
+ # --+-----------+-----------+--------
+ # : | :X:X...X:X|X:X:X...X:X|X:X: :
+ # --+-----------+-----------+--------
+ length=2049
+ length=$((2 * OBJECT_PAGES + 1))
+ do_read "${image_name}" "${offset}" "${length}" ${extended}
+ # offset=$((offset + length))
+}
+
+function run_using() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local out_data=$(out_data_dir "${image_name}")
+
+ verbose "===== running using \"${image_name}\" ====="
+ mkdir -p "${out_data}"
+ one_pass "${image_name}"
+ one_pass "${image_name}" extended
+}
+
+function compare() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local out_data=$(out_data_dir "${image_name}")
+ local original=$(out_data_dir "${ORIGINAL}")
+
+ verbose "===== comparing \"${image_name}\" ====="
+ for i in $(ls "${original}"); do
+ verbose compare "\"${image_name}\" \"${i}\""
+ cmp "${original}/${i}" "${out_data}/${i}"
+ done
+ [ "${image_name}" = "${ORIGINAL}" ] || rm -rf "${out_data}"
+}
+
+function doit() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+
+ run_using "${image_name}"
+ compare "${image_name}"
+}
+
+########## Start
+
+parseargs "$@"
+
+trap teardown EXIT HUP INT
+setup
+
+run_using "${ORIGINAL}"
+doit "${ORIGINAL}@${SNAP1}"
+if [ "${TEST_CLONES}" = true ]; then
+ doit "${CLONE1}"
+ doit "${CLONE1}@${SNAP2}"
+ doit "${CLONE2}"
+fi
+rm -rf $(out_data_dir "${ORIGINAL}")
+
+echo "Success!"
+
+exit 0
diff --git a/qa/workunits/rbd/import_export.sh b/qa/workunits/rbd/import_export.sh
new file mode 100755
index 00000000..89e8d35c
--- /dev/null
+++ b/qa/workunits/rbd/import_export.sh
@@ -0,0 +1,259 @@
+#!/bin/sh -ex
+
+# V1 image unsupported but required for testing purposes
+export RBD_FORCE_ALLOW_V1=1
+
+# returns data pool for a given image
+get_image_data_pool () {
+ image=$1
+ data_pool=$(rbd info $image | grep "data_pool: " | awk -F':' '{ print $NF }')
+ if [ -z $data_pool ]; then
+ data_pool='rbd'
+ fi
+
+ echo $data_pool
+}
+
+# return list of object numbers populated in image
+objects () {
+ image=$1
+ prefix=$(rbd info $image | grep block_name_prefix | awk '{print $NF;}')
+
+ # strip off prefix and leading zeros from objects; sort, although
+ # it doesn't necessarily make sense as they're hex, at least it makes
+ # the list repeatable and comparable
+ objects=$(rados ls -p $(get_image_data_pool $image) | grep $prefix | \
+ sed -e 's/'$prefix'\.//' -e 's/^0*\([0-9a-f]\)/\1/' | sort -u)
+ echo $objects
+}
+
+# return false if either files don't compare or their ondisk
+# sizes don't compare
+
+compare_files_and_ondisk_sizes () {
+ cmp -l $1 $2 || return 1
+ origsize=$(stat $1 --format %b)
+ exportsize=$(stat $2 --format %b)
+ difference=$(($exportsize - $origsize))
+ difference=${difference#-} # absolute value
+ test $difference -ge 0 -a $difference -lt 4096
+}
+
+TMPDIR=/tmp/rbd_import_export_$$
+rm -rf $TMPDIR
+mkdir $TMPDIR
+trap "rm -rf $TMPDIR" INT TERM EXIT
+
+# cannot import a dir
+mkdir foo.$$
+rbd import foo.$$ foo.dir && exit 1 || true # should fail
+rmdir foo.$$
+
+# create a sparse file
+dd if=/bin/sh of=${TMPDIR}/img bs=1k count=1 seek=10
+dd if=/bin/dd of=${TMPDIR}/img bs=1k count=10 seek=100
+dd if=/bin/rm of=${TMPDIR}/img bs=1k count=100 seek=1000
+dd if=/bin/ls of=${TMPDIR}/img bs=1k seek=10000
+dd if=/bin/ln of=${TMPDIR}/img bs=1k seek=100000
+dd if=/bin/grep of=${TMPDIR}/img bs=1k seek=1000000
+
+rbd rm testimg || true
+
+rbd import $RBD_CREATE_ARGS ${TMPDIR}/img testimg
+rbd export testimg ${TMPDIR}/img2
+rbd export testimg - > ${TMPDIR}/img3
+rbd rm testimg
+cmp ${TMPDIR}/img ${TMPDIR}/img2
+cmp ${TMPDIR}/img ${TMPDIR}/img3
+rm ${TMPDIR}/img2 ${TMPDIR}/img3
+
+# try again, importing from stdin
+rbd import $RBD_CREATE_ARGS - testimg < ${TMPDIR}/img
+rbd export testimg ${TMPDIR}/img2
+rbd export testimg - > ${TMPDIR}/img3
+rbd rm testimg
+cmp ${TMPDIR}/img ${TMPDIR}/img2
+cmp ${TMPDIR}/img ${TMPDIR}/img3
+
+rm ${TMPDIR}/img ${TMPDIR}/img2 ${TMPDIR}/img3
+
+if rbd help export | grep -q export-format; then
+ # try with --export-format for snapshots
+ dd if=/bin/dd of=${TMPDIR}/img bs=1k count=10 seek=100
+ rbd import $RBD_CREATE_ARGS ${TMPDIR}/img testimg
+ rbd snap create testimg@snap
+ rbd image-meta set testimg key1 value1
+ IMAGEMETA_BEFORE=`rbd image-meta list testimg`
+ rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+ rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+ rbd info testimg_import
+ rbd info testimg_import@snap
+ IMAGEMETA_AFTER=`rbd image-meta list testimg_import`
+ [ "$IMAGEMETA_BEFORE" = "$IMAGEMETA_AFTER" ]
+
+ # compare the contents between testimg and testimg_import
+ rbd export testimg_import ${TMPDIR}/img_import
+ compare_files_and_ondisk_sizes ${TMPDIR}/img ${TMPDIR}/img_import
+
+ rbd export testimg@snap ${TMPDIR}/img_snap
+ rbd export testimg_import@snap ${TMPDIR}/img_snap_import
+ compare_files_and_ondisk_sizes ${TMPDIR}/img_snap ${TMPDIR}/img_snap_import
+
+ rm ${TMPDIR}/img_v2
+ rm ${TMPDIR}/img_import
+ rm ${TMPDIR}/img_snap
+ rm ${TMPDIR}/img_snap_import
+
+ rbd snap rm testimg_import@snap
+ rbd remove testimg_import
+ rbd snap rm testimg@snap
+ rbd rm testimg
+
+ # order
+ rbd import --order 20 ${TMPDIR}/img testimg
+ rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+ rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+ rbd info testimg_import|grep order|awk '{print $2}'|grep 20
+
+ rm ${TMPDIR}/img_v2
+
+ rbd remove testimg_import
+ rbd remove testimg
+
+ # features
+ rbd import --image-feature layering ${TMPDIR}/img testimg
+ FEATURES_BEFORE=`rbd info testimg|grep features`
+ rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+ rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+ FEATURES_AFTER=`rbd info testimg_import|grep features`
+ if [ "$FEATURES_BEFORE" != "$FEATURES_AFTER" ]; then
+ false
+ fi
+
+ rm ${TMPDIR}/img_v2
+
+ rbd remove testimg_import
+ rbd remove testimg
+
+ # stripe
+ rbd import --stripe-count 1000 --stripe-unit 4096 ${TMPDIR}/img testimg
+ rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+ rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+ rbd info testimg_import|grep "stripe unit"|grep -Ei '(4 KiB|4096)'
+ rbd info testimg_import|grep "stripe count"|awk '{print $3}'|grep 1000
+
+ rm ${TMPDIR}/img_v2
+
+ rbd remove testimg_import
+ rbd remove testimg
+
+ # snap protect
+ rbd import --image-format=2 ${TMPDIR}/img testimg
+ rbd snap create testimg@snap1
+ rbd snap create testimg@snap2
+ rbd snap protect testimg@snap2
+ rbd export --export-format 2 testimg ${TMPDIR}/snap_protect
+ rbd import --export-format 2 ${TMPDIR}/snap_protect testimg_import
+ rbd info testimg_import@snap1 | grep 'protected: False'
+ rbd info testimg_import@snap2 | grep 'protected: True'
+
+ rm ${TMPDIR}/snap_protect
+
+ rbd snap unprotect testimg@snap2
+ rbd snap unprotect testimg_import@snap2
+ rbd snap purge testimg
+ rbd snap purge testimg_import
+ rbd remove testimg
+ rbd remove testimg_import
+fi
+
+tiered=0
+if ceph osd dump | grep ^pool | grep "'rbd'" | grep tier; then
+ tiered=1
+fi
+
+# create specifically sparse files
+# 1 1M block of sparse, 1 1M block of random
+dd if=/dev/urandom bs=1M seek=1 count=1 of=${TMPDIR}/sparse1
+
+# 1 1M block of random, 1 1M block of sparse
+dd if=/dev/urandom bs=1M count=1 of=${TMPDIR}/sparse2; truncate ${TMPDIR}/sparse2 -s 2M
+
+# 1M-block images; validate resulting blocks
+
+# 1M sparse, 1M data
+rbd rm sparse1 || true
+rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse1
+rbd ls -l | grep sparse1 | grep -Ei '(2 MiB|2048k)'
+[ $tiered -eq 1 -o "$(objects sparse1)" = '1' ]
+
+# export, compare contents and on-disk size
+rbd export sparse1 ${TMPDIR}/sparse1.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse1 ${TMPDIR}/sparse1.out
+rm ${TMPDIR}/sparse1.out
+rbd rm sparse1
+
+# 1M data, 1M sparse
+rbd rm sparse2 || true
+rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse2
+rbd ls -l | grep sparse2 | grep -Ei '(2 MiB|2048k)'
+[ $tiered -eq 1 -o "$(objects sparse2)" = '0' ]
+rbd export sparse2 ${TMPDIR}/sparse2.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse2 ${TMPDIR}/sparse2.out
+rm ${TMPDIR}/sparse2.out
+rbd rm sparse2
+
+# extend sparse1 to 10 1M blocks, sparse at the end
+truncate ${TMPDIR}/sparse1 -s 10M
+# import from stdin just for fun, verify still sparse
+rbd import $RBD_CREATE_ARGS --order 20 - sparse1 < ${TMPDIR}/sparse1
+rbd ls -l | grep sparse1 | grep -Ei '(10 MiB|10240k)'
+[ $tiered -eq 1 -o "$(objects sparse1)" = '1' ]
+rbd export sparse1 ${TMPDIR}/sparse1.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse1 ${TMPDIR}/sparse1.out
+rm ${TMPDIR}/sparse1.out
+rbd rm sparse1
+
+# extend sparse2 to 4M total with two more nonsparse megs
+dd if=/dev/urandom bs=2M count=1 of=${TMPDIR}/sparse2 oflag=append conv=notrunc
+# again from stding
+rbd import $RBD_CREATE_ARGS --order 20 - sparse2 < ${TMPDIR}/sparse2
+rbd ls -l | grep sparse2 | grep -Ei '(4 MiB|4096k)'
+[ $tiered -eq 1 -o "$(objects sparse2)" = '0 2 3' ]
+rbd export sparse2 ${TMPDIR}/sparse2.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse2 ${TMPDIR}/sparse2.out
+rm ${TMPDIR}/sparse2.out
+rbd rm sparse2
+
+# zeros import to a sparse image. Note: all zeros currently
+# doesn't work right now due to the way we handle 'empty' fiemaps;
+# the image ends up zero-filled.
+
+echo "partially-sparse file imports to partially-sparse image"
+rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse1 sparse
+[ $tiered -eq 1 -o "$(objects sparse)" = '1' ]
+rbd rm sparse
+
+echo "zeros import through stdin to sparse image"
+# stdin
+dd if=/dev/zero bs=1M count=4 | rbd import $RBD_CREATE_ARGS - sparse
+[ $tiered -eq 1 -o "$(objects sparse)" = '' ]
+rbd rm sparse
+
+echo "zeros export to sparse file"
+# Must be tricky to make image "by hand" ; import won't create a zero image
+rbd create $RBD_CREATE_ARGS sparse --size 4
+prefix=$(rbd info sparse | grep block_name_prefix | awk '{print $NF;}')
+# drop in 0 object directly
+dd if=/dev/zero bs=4M count=1 | rados -p $(get_image_data_pool sparse) \
+ put ${prefix}.000000000000 -
+[ $tiered -eq 1 -o "$(objects sparse)" = '0' ]
+# 1 object full of zeros; export should still create 0-disk-usage file
+rm ${TMPDIR}/sparse || true
+rbd export sparse ${TMPDIR}/sparse
+[ $(stat ${TMPDIR}/sparse --format=%b) = '0' ]
+rbd rm sparse
+
+rm ${TMPDIR}/sparse ${TMPDIR}/sparse1 ${TMPDIR}/sparse2 ${TMPDIR}/sparse3 || true
+
+echo OK
diff --git a/qa/workunits/rbd/issue-20295.sh b/qa/workunits/rbd/issue-20295.sh
new file mode 100755
index 00000000..3d617a06
--- /dev/null
+++ b/qa/workunits/rbd/issue-20295.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -ex
+
+TEST_POOL=ecpool
+TEST_IMAGE=test1
+PGS=12
+
+ceph osd pool create $TEST_POOL $PGS $PGS erasure
+ceph osd pool application enable $TEST_POOL rbd
+ceph osd pool set $TEST_POOL allow_ec_overwrites true
+rbd --data-pool $TEST_POOL create --size 1024G $TEST_IMAGE
+rbd bench \
+ --io-type write \
+ --io-size 4096 \
+ --io-pattern=rand \
+ --io-total 100M \
+ $TEST_IMAGE
+
+echo "OK"
diff --git a/qa/workunits/rbd/journal.sh b/qa/workunits/rbd/journal.sh
new file mode 100755
index 00000000..ba89e75c
--- /dev/null
+++ b/qa/workunits/rbd/journal.sh
@@ -0,0 +1,326 @@
+#!/usr/bin/env bash
+set -e
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+function list_tests()
+{
+ echo "AVAILABLE TESTS"
+ for i in $TESTS; do
+ echo " $i"
+ done
+}
+
+function usage()
+{
+ echo "usage: $0 [-h|-l|-t <testname> [-t <testname>...] [--no-cleanup]]"
+}
+
+function expect_false()
+{
+ set -x
+ if "$@"; then return 1; else return 0; fi
+}
+
+function save_commit_position()
+{
+ local journal=$1
+
+ rados -p rbd getomapval journal.${journal} client_ \
+ $TMPDIR/${journal}.client_.omap
+}
+
+function restore_commit_position()
+{
+ local journal=$1
+
+ rados -p rbd setomapval journal.${journal} client_ \
+ < $TMPDIR/${journal}.client_.omap
+}
+
+test_rbd_journal()
+{
+ local image=testrbdjournal$$
+
+ rbd create --image-feature exclusive-lock --image-feature journaling \
+ --size 128 ${image}
+ local journal=$(rbd info ${image} --format=xml 2>/dev/null |
+ $XMLSTARLET sel -t -v "//image/journal")
+ test -n "${journal}"
+ rbd journal info ${journal}
+ rbd journal info --journal ${journal}
+ rbd journal info --image ${image}
+
+ rbd feature disable ${image} journaling
+
+ rbd info ${image} --format=xml 2>/dev/null |
+ expect_false $XMLSTARLET sel -t -v "//image/journal"
+ expect_false rbd journal info ${journal}
+ expect_false rbd journal info --image ${image}
+
+ rbd feature enable ${image} journaling
+
+ local journal1=$(rbd info ${image} --format=xml 2>/dev/null |
+ $XMLSTARLET sel -t -v "//image/journal")
+ test "${journal}" = "${journal1}"
+
+ rbd journal info ${journal}
+
+ rbd journal status ${journal}
+
+ local count=10
+ save_commit_position ${journal}
+ rbd bench --io-type write ${image} --io-size 4096 --io-threads 1 \
+ --io-total $((4096 * count)) --io-pattern seq
+ rbd journal status --image ${image} | fgrep "tid=$((count - 1))"
+ restore_commit_position ${journal}
+ rbd journal status --image ${image} | fgrep "positions=[]"
+ local count1=$(rbd journal inspect --verbose ${journal} |
+ grep -c 'event_type.*AioWrite')
+ test "${count}" -eq "${count1}"
+
+ rbd journal export ${journal} $TMPDIR/journal.export
+ local size=$(stat -c "%s" $TMPDIR/journal.export)
+ test "${size}" -gt 0
+
+ rbd export ${image} $TMPDIR/${image}.export
+
+ local image1=${image}1
+ rbd create --image-feature exclusive-lock --image-feature journaling \
+ --size 128 ${image1}
+ journal1=$(rbd info ${image1} --format=xml 2>/dev/null |
+ $XMLSTARLET sel -t -v "//image/journal")
+
+ save_commit_position ${journal1}
+ rbd journal import --dest ${image1} $TMPDIR/journal.export
+ rbd snap create ${image1}@test
+ restore_commit_position ${journal1}
+ # check that commit position is properly updated: the journal should contain
+ # 14 entries (2 AioFlush + 10 AioWrite + 1 SnapCreate + 1 OpFinish) and
+ # commit position set to tid=14
+ rbd journal inspect --image ${image1} --verbose | awk '
+ /AioFlush/ {a++} # match: "event_type": "AioFlush",
+ /AioWrite/ {w++} # match: "event_type": "AioWrite",
+ /SnapCreate/ {s++} # match: "event_type": "SnapCreate",
+ /OpFinish/ {f++} # match: "event_type": "OpFinish",
+ /entries inspected/ {t=$1; e=$4} # match: 14 entries inspected, 0 errors
+ {print} # for diagnostic
+ END {
+ if (a != 2 || w != 10 || s != 1 || f != 1 || t != 14 || e != 0) exit(1)
+ }
+ '
+
+ rbd export ${image1}@test $TMPDIR/${image1}.export
+ cmp $TMPDIR/${image}.export $TMPDIR/${image1}.export
+
+ rbd journal reset ${journal}
+
+ rbd journal inspect --verbose ${journal} | expect_false grep 'event_type'
+
+ rbd snap purge ${image1}
+ rbd remove ${image1}
+ rbd remove ${image}
+}
+
+
+rbd_assert_eq() {
+ local image=$1
+ local cmd=$2
+ local param=$3
+ local expected_val=$4
+
+ local val=$(rbd --format xml ${cmd} --image ${image} |
+ $XMLSTARLET sel -t -v "${param}")
+ test "${val}" = "${expected_val}"
+}
+
+test_rbd_create()
+{
+ local image=testrbdcreate$$
+
+ rbd create --image-feature exclusive-lock --image-feature journaling \
+ --journal-pool rbd \
+ --journal-object-size 20M \
+ --journal-splay-width 6 \
+ --size 256 ${image}
+
+ rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+ rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+ rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+ rbd remove ${image}
+}
+
+test_rbd_copy()
+{
+ local src=testrbdcopys$$
+ rbd create --size 256 ${src}
+
+ local image=testrbdcopy$$
+ rbd copy --image-feature exclusive-lock --image-feature journaling \
+ --journal-pool rbd \
+ --journal-object-size 20M \
+ --journal-splay-width 6 \
+ ${src} ${image}
+
+ rbd remove ${src}
+
+ rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+ rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+ rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+ rbd remove ${image}
+}
+
+test_rbd_deep_copy()
+{
+ local src=testrbdcopys$$
+ rbd create --size 256 ${src}
+ rbd snap create ${src}@snap1
+
+ local dest=testrbdcopy$$
+ rbd deep copy --image-feature exclusive-lock --image-feature journaling \
+ --journal-pool rbd \
+ --journal-object-size 20M \
+ --journal-splay-width 6 \
+ ${src} ${dest}
+
+ rbd snap purge ${src}
+ rbd remove ${src}
+
+ rbd_assert_eq ${dest} 'journal info' '//journal/order' 25
+ rbd_assert_eq ${dest} 'journal info' '//journal/splay_width' 6
+ rbd_assert_eq ${dest} 'journal info' '//journal/object_pool' rbd
+
+ rbd snap purge ${dest}
+ rbd remove ${dest}
+}
+
+test_rbd_clone()
+{
+ local parent=testrbdclonep$$
+ rbd create --image-feature layering --size 256 ${parent}
+ rbd snap create ${parent}@snap
+ rbd snap protect ${parent}@snap
+
+ local image=testrbdclone$$
+ rbd clone --image-feature layering --image-feature exclusive-lock --image-feature journaling \
+ --journal-pool rbd \
+ --journal-object-size 20M \
+ --journal-splay-width 6 \
+ ${parent}@snap ${image}
+
+ rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+ rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+ rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+ rbd remove ${image}
+ rbd snap unprotect ${parent}@snap
+ rbd snap purge ${parent}
+ rbd remove ${parent}
+}
+
+test_rbd_import()
+{
+ local src=testrbdimports$$
+ rbd create --size 256 ${src}
+
+ rbd export ${src} $TMPDIR/${src}.export
+ rbd remove ${src}
+
+ local image=testrbdimport$$
+ rbd import --image-feature exclusive-lock --image-feature journaling \
+ --journal-pool rbd \
+ --journal-object-size 20M \
+ --journal-splay-width 6 \
+ $TMPDIR/${src}.export ${image}
+
+ rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+ rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+ rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+ rbd remove ${image}
+}
+
+test_rbd_feature()
+{
+ local image=testrbdfeature$$
+
+ rbd create --image-feature exclusive-lock --size 256 ${image}
+
+ rbd feature enable ${image} journaling \
+ --journal-pool rbd \
+ --journal-object-size 20M \
+ --journal-splay-width 6
+
+ rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+ rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+ rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+ rbd remove ${image}
+}
+
+TESTS+=" rbd_journal"
+TESTS+=" rbd_create"
+TESTS+=" rbd_copy"
+TESTS+=" rbd_clone"
+TESTS+=" rbd_import"
+TESTS+=" rbd_feature"
+
+#
+# "main" follows
+#
+
+tests_to_run=()
+
+cleanup=true
+
+while [[ $# -gt 0 ]]; do
+ opt=$1
+
+ case "$opt" in
+ "-l" )
+ do_list=1
+ ;;
+ "--no-cleanup" )
+ cleanup=false
+ ;;
+ "-t" )
+ shift
+ if [[ -z "$1" ]]; then
+ echo "missing argument to '-t'"
+ usage ;
+ exit 1
+ fi
+ tests_to_run+=" $1"
+ ;;
+ "-h" )
+ usage ;
+ exit 0
+ ;;
+ esac
+ shift
+done
+
+if [[ $do_list -eq 1 ]]; then
+ list_tests ;
+ exit 0
+fi
+
+TMPDIR=/tmp/rbd_journal$$
+mkdir $TMPDIR
+if $cleanup; then
+ trap "rm -fr $TMPDIR" 0
+fi
+
+if test -z "$tests_to_run" ; then
+ tests_to_run="$TESTS"
+fi
+
+for i in $tests_to_run; do
+ set -x
+ test_${i}
+ set +x
+done
+
+echo OK
diff --git a/qa/workunits/rbd/kernel.sh b/qa/workunits/rbd/kernel.sh
new file mode 100755
index 00000000..faa5760e
--- /dev/null
+++ b/qa/workunits/rbd/kernel.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+set -ex
+
+CEPH_SECRET_FILE=${CEPH_SECRET_FILE:-}
+CEPH_ID=${CEPH_ID:-admin}
+SECRET_ARGS=''
+if [ ! -z $CEPH_SECRET_FILE ]; then
+ SECRET_ARGS="--secret $CEPH_SECRET_FILE"
+fi
+
+TMP_FILES="/tmp/img1 /tmp/img1.small /tmp/img1.snap1 /tmp/img1.export /tmp/img1.trunc"
+
+function expect_false() {
+ if "$@"; then return 1; else return 0; fi
+}
+
+function get_device_dir {
+ local POOL=$1
+ local IMAGE=$2
+ local SNAP=$3
+ rbd device list | tail -n +2 | egrep "\s+$POOL\s+$IMAGE\s+$SNAP\s+" |
+ awk '{print $1;}'
+}
+
+function clean_up {
+ [ -e /dev/rbd/rbd/testimg1@snap1 ] &&
+ sudo rbd device unmap /dev/rbd/rbd/testimg1@snap1
+ if [ -e /dev/rbd/rbd/testimg1 ]; then
+ sudo rbd device unmap /dev/rbd/rbd/testimg1
+ rbd snap purge testimg1 || true
+ fi
+ rbd ls | grep testimg1 > /dev/null && rbd rm testimg1 || true
+ sudo rm -f $TMP_FILES
+}
+
+clean_up
+
+trap clean_up INT TERM EXIT
+
+# create an image
+dd if=/bin/sh of=/tmp/img1 bs=1k count=1 seek=10
+dd if=/bin/dd of=/tmp/img1 bs=1k count=10 seek=100
+dd if=/bin/rm of=/tmp/img1 bs=1k count=100 seek=1000
+dd if=/bin/ls of=/tmp/img1 bs=1k seek=10000
+dd if=/bin/ln of=/tmp/img1 bs=1k seek=100000
+dd if=/dev/zero of=/tmp/img1 count=0 seek=150000
+
+# import
+rbd import /tmp/img1 testimg1
+sudo rbd device map testimg1 --user $CEPH_ID $SECRET_ARGS
+
+DEV_ID1=$(get_device_dir rbd testimg1 -)
+echo "dev_id1 = $DEV_ID1"
+cat /sys/bus/rbd/devices/$DEV_ID1/size
+cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 76800000
+
+sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.export
+cmp /tmp/img1 /tmp/img1.export
+
+# snapshot
+rbd snap create testimg1 --snap=snap1
+sudo rbd device map --snap=snap1 testimg1 --user $CEPH_ID $SECRET_ARGS
+
+DEV_ID2=$(get_device_dir rbd testimg1 snap1)
+cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000
+
+sudo dd if=/dev/rbd/rbd/testimg1@snap1 of=/tmp/img1.snap1
+cmp /tmp/img1 /tmp/img1.snap1
+
+# resize
+rbd resize testimg1 --size=40 --allow-shrink
+cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 41943040
+cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000
+
+sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.small
+cp /tmp/img1 /tmp/img1.trunc
+truncate -s 41943040 /tmp/img1.trunc
+cmp /tmp/img1.trunc /tmp/img1.small
+
+# rollback expects an unlocked image
+# (acquire and) release the lock as a side effect
+rbd bench --io-type read --io-size 1 --io-threads 1 --io-total 1 testimg1
+
+# rollback and check data again
+rbd snap rollback --snap=snap1 testimg1
+cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 76800000
+cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000
+sudo rm -f /tmp/img1.snap1 /tmp/img1.export
+
+sudo dd if=/dev/rbd/rbd/testimg1@snap1 of=/tmp/img1.snap1
+cmp /tmp/img1 /tmp/img1.snap1
+sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.export
+cmp /tmp/img1 /tmp/img1.export
+
+# zeros are returned if an image or a snapshot is removed
+expect_false cmp -n 76800000 /dev/rbd/rbd/testimg1@snap1 /dev/zero
+rbd snap rm --snap=snap1 testimg1
+cmp -n 76800000 /dev/rbd/rbd/testimg1@snap1 /dev/zero
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_data_pool.sh b/qa/workunits/rbd/krbd_data_pool.sh
new file mode 100755
index 00000000..e8fc8348
--- /dev/null
+++ b/qa/workunits/rbd/krbd_data_pool.sh
@@ -0,0 +1,206 @@
+#!/usr/bin/env bash
+
+set -ex
+
+export RBD_FORCE_ALLOW_V1=1
+
+function fill_image() {
+ local spec=$1
+
+ local dev
+ dev=$(sudo rbd map $spec)
+ xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 -W 0 $IMAGE_SIZE" $dev
+ sudo rbd unmap $dev
+}
+
+function create_clones() {
+ local spec=$1
+
+ rbd snap create $spec@snap
+ rbd snap protect $spec@snap
+
+ local pool=${spec%/*} # pool/image is assumed
+ local image=${spec#*/}
+ local child_pool
+ for child_pool in $pool clonesonly; do
+ rbd clone $spec@snap $child_pool/$pool-$image-clone1
+ rbd clone $spec@snap --data-pool repdata $child_pool/$pool-$image-clone2
+ rbd clone $spec@snap --data-pool ecdata $child_pool/$pool-$image-clone3
+ done
+}
+
+function trigger_copyup() {
+ local spec=$1
+
+ local dev
+ dev=$(sudo rbd map $spec)
+ local i
+ {
+ for ((i = 0; i < $NUM_OBJECTS; i++)); do
+ echo pwrite -b $OBJECT_SIZE -S 0x59 $((i * OBJECT_SIZE + OBJECT_SIZE / 2)) $((OBJECT_SIZE / 2))
+ done
+ echo fsync
+ echo quit
+ } | xfs_io $dev
+ sudo rbd unmap $dev
+}
+
+function compare() {
+ local spec=$1
+ local object=$2
+
+ local dev
+ dev=$(sudo rbd map $spec)
+ local i
+ for ((i = 0; i < $NUM_OBJECTS; i++)); do
+ dd if=$dev bs=$OBJECT_SIZE count=1 skip=$i | cmp $object -
+ done
+ sudo rbd unmap $dev
+}
+
+function mkfs_and_mount() {
+ local spec=$1
+
+ local dev
+ dev=$(sudo rbd map $spec)
+ blkdiscard $dev
+ mkfs.ext4 -q -E nodiscard $dev
+ sudo mount $dev /mnt
+ sudo umount /mnt
+ sudo rbd unmap $dev
+}
+
+function list_HEADs() {
+ local pool=$1
+
+ rados -p $pool ls | while read obj; do
+ if rados -p $pool stat $obj >/dev/null 2>&1; then
+ echo $obj
+ fi
+ done
+}
+
+function count_data_objects() {
+ local spec=$1
+
+ local pool
+ pool=$(rbd info $spec | grep 'data_pool: ' | awk '{ print $NF }')
+ if [[ -z $pool ]]; then
+ pool=${spec%/*} # pool/image is assumed
+ fi
+
+ local prefix
+ prefix=$(rbd info $spec | grep 'block_name_prefix: ' | awk '{ print $NF }')
+ rados -p $pool ls | grep -c $prefix
+}
+
+function get_num_clones() {
+ local pool=$1
+
+ rados -p $pool --format=json df |
+ python -c 'import sys, json; print json.load(sys.stdin)["pools"][0]["num_object_clones"]'
+}
+
+ceph osd pool create repdata 24 24
+rbd pool init repdata
+ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
+ceph osd pool create ecdata 24 24 erasure teuthologyprofile
+rbd pool init ecdata
+ceph osd pool set ecdata allow_ec_overwrites true
+ceph osd pool create rbdnonzero 24 24
+rbd pool init rbdnonzero
+ceph osd pool create clonesonly 24 24
+rbd pool init clonesonly
+
+for pool in rbd rbdnonzero; do
+ rbd create --size 200 --image-format 1 $pool/img0
+ rbd create --size 200 $pool/img1
+ rbd create --size 200 --data-pool repdata $pool/img2
+ rbd create --size 200 --data-pool ecdata $pool/img3
+done
+
+IMAGE_SIZE=$(rbd info --format=json img1 | python -c 'import sys, json; print json.load(sys.stdin)["size"]')
+OBJECT_SIZE=$(rbd info --format=json img1 | python -c 'import sys, json; print json.load(sys.stdin)["object_size"]')
+NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE))
+[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]]
+
+OBJECT_X=$(mktemp) # xxxx
+xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 0 $OBJECT_SIZE" $OBJECT_X
+
+OBJECT_XY=$(mktemp) # xxYY
+xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 0 $((OBJECT_SIZE / 2))" \
+ -c "pwrite -b $OBJECT_SIZE -S 0x59 $((OBJECT_SIZE / 2)) $((OBJECT_SIZE / 2))" \
+ $OBJECT_XY
+
+for pool in rbd rbdnonzero; do
+ for i in {0..3}; do
+ fill_image $pool/img$i
+ if [[ $i -ne 0 ]]; then
+ create_clones $pool/img$i
+ for child_pool in $pool clonesonly; do
+ for j in {1..3}; do
+ trigger_copyup $child_pool/$pool-img$i-clone$j
+ done
+ done
+ fi
+ done
+done
+
+# rbd_directory, rbd_children, rbd_info + img0 header + ...
+NUM_META_RBDS=$((3 + 1 + 3 * (1*2 + 3*2)))
+# rbd_directory, rbd_children, rbd_info + ...
+NUM_META_CLONESONLY=$((3 + 2 * 3 * (3*2)))
+
+[[ $(rados -p rbd ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(rados -p repdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(rados -p ecdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(rados -p rbdnonzero ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(rados -p clonesonly ls | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]]
+
+for pool in rbd rbdnonzero; do
+ for i in {0..3}; do
+ [[ $(count_data_objects $pool/img$i) -eq $NUM_OBJECTS ]]
+ if [[ $i -ne 0 ]]; then
+ for child_pool in $pool clonesonly; do
+ for j in {1..3}; do
+ [[ $(count_data_objects $child_pool/$pool-img$i-clone$j) -eq $NUM_OBJECTS ]]
+ done
+ done
+ fi
+ done
+done
+
+[[ $(get_num_clones rbd) -eq 0 ]]
+[[ $(get_num_clones repdata) -eq 0 ]]
+[[ $(get_num_clones ecdata) -eq 0 ]]
+[[ $(get_num_clones rbdnonzero) -eq 0 ]]
+[[ $(get_num_clones clonesonly) -eq 0 ]]
+
+for pool in rbd rbdnonzero; do
+ for i in {0..3}; do
+ compare $pool/img$i $OBJECT_X
+ mkfs_and_mount $pool/img$i
+ if [[ $i -ne 0 ]]; then
+ for child_pool in $pool clonesonly; do
+ for j in {1..3}; do
+ compare $child_pool/$pool-img$i-clone$j $OBJECT_XY
+ done
+ done
+ fi
+ done
+done
+
+# mkfs_and_mount should discard some objects everywhere but in clonesonly
+[[ $(list_HEADs rbd | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs repdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs ecdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs rbdnonzero | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs clonesonly | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]]
+
+[[ $(get_num_clones rbd) -eq $NUM_OBJECTS ]]
+[[ $(get_num_clones repdata) -eq $((2 * NUM_OBJECTS)) ]]
+[[ $(get_num_clones ecdata) -eq $((2 * NUM_OBJECTS)) ]]
+[[ $(get_num_clones rbdnonzero) -eq $NUM_OBJECTS ]]
+[[ $(get_num_clones clonesonly) -eq 0 ]]
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_exclusive_option.sh b/qa/workunits/rbd/krbd_exclusive_option.sh
new file mode 100755
index 00000000..d7bcbb6d
--- /dev/null
+++ b/qa/workunits/rbd/krbd_exclusive_option.sh
@@ -0,0 +1,233 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function expect_false() {
+ if "$@"; then return 1; else return 0; fi
+}
+
+function assert_locked() {
+ local dev_id="${1#/dev/rbd}"
+
+ local client_addr
+ client_addr="$(< $SYSFS_DIR/$dev_id/client_addr)"
+
+ local client_id
+ client_id="$(< $SYSFS_DIR/$dev_id/client_id)"
+ # client4324 -> client.4324
+ client_id="client.${client_id#client}"
+
+ local watch_cookie
+ watch_cookie="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID |
+ grep $client_id | cut -d ' ' -f 3 | cut -d '=' -f 2)"
+ [[ $(echo -n "$watch_cookie" | grep -c '^') -eq 1 ]]
+
+ local actual
+ actual="$(rados -p rbd --format=json lock info rbd_header.$IMAGE_ID rbd_lock |
+ python -m json.tool)"
+
+ local expected
+ expected="$(cat <<EOF | python -m json.tool
+{
+ "lockers": [
+ {
+ "addr": "$client_addr",
+ "cookie": "auto $watch_cookie",
+ "description": "",
+ "expiration": "0.000000",
+ "name": "$client_id"
+ }
+ ],
+ "name": "rbd_lock",
+ "tag": "internal",
+ "type": "exclusive"
+}
+EOF
+ )"
+
+ [ "$actual" = "$expected" ]
+}
+
+function assert_unlocked() {
+ rados -p rbd --format=json lock info rbd_header.$IMAGE_ID rbd_lock |
+ grep '"lockers":\[\]'
+}
+
+function blacklist_add() {
+ local dev_id="${1#/dev/rbd}"
+
+ local client_addr
+ client_addr="$(< $SYSFS_DIR/$dev_id/client_addr)"
+
+ ceph osd blacklist add $client_addr
+}
+
+SYSFS_DIR="/sys/bus/rbd/devices"
+IMAGE_NAME="exclusive-option-test"
+
+rbd create --size 1 --image-feature '' $IMAGE_NAME
+
+IMAGE_ID="$(rbd info --format=json $IMAGE_NAME |
+ python -c "import sys, json; print json.load(sys.stdin)['block_name_prefix'].split('.')[1]")"
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_unlocked
+sudo rbd unmap $DEV
+assert_unlocked
+
+expect_false sudo rbd map -o exclusive $IMAGE_NAME
+assert_unlocked
+
+expect_false sudo rbd map -o lock_on_read $IMAGE_NAME
+assert_unlocked
+
+rbd feature enable $IMAGE_NAME exclusive-lock
+rbd snap create $IMAGE_NAME@snap
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+[[ $(blockdev --getro $DEV) -eq 0 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME@snap)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o ro $IMAGE_NAME)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+[[ $(blockdev --getro $DEV) -eq 0 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME@snap)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive,ro $IMAGE_NAME)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+# alternate syntax
+DEV=$(sudo rbd map --exclusive --read-only $IMAGE_NAME)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+OTHER_DEV=$(sudo rbd map -o noshare $IMAGE_NAME)
+assert_locked $OTHER_DEV
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $DEV
+dd if=/dev/urandom of=$OTHER_DEV bs=4k count=10 oflag=direct
+assert_locked $OTHER_DEV
+sudo rbd unmap $DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+OTHER_DEV=$(sudo rbd map -o noshare,exclusive $IMAGE_NAME)
+assert_locked $OTHER_DEV
+dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+expect_false dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $OTHER_DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+assert_unlocked
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o lock_on_read $IMAGE_NAME)
+assert_locked $DEV
+OTHER_DEV=$(sudo rbd map -o noshare,exclusive $IMAGE_NAME)
+assert_locked $OTHER_DEV
+expect_false dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+expect_false dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+sudo udevadm settle
+assert_locked $OTHER_DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+assert_locked $DEV
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+expect_false sudo rbd map -o noshare $IMAGE_NAME
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+expect_false sudo rbd map -o noshare,exclusive $IMAGE_NAME
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+rbd resize --size 1G $IMAGE_NAME
+assert_unlocked
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+expect_false rbd resize --size 2G $IMAGE_NAME
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+{ sleep 10; blacklist_add $DEV; } &
+PID=$!
+expect_false dd if=/dev/urandom of=$DEV bs=4k count=200000 oflag=direct
+wait $PID
+# break lock
+OTHER_DEV=$(sudo rbd map -o noshare $IMAGE_NAME)
+assert_locked $OTHER_DEV
+sudo rbd unmap $DEV
+assert_locked $OTHER_DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+
+# induce a watch error after 30 seconds
+DEV=$(sudo rbd map -o exclusive,osdkeepalive=60 $IMAGE_NAME)
+assert_locked $DEV
+OLD_WATCHER="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID)"
+sleep 40
+assert_locked $DEV
+NEW_WATCHER="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID)"
+# same client_id, old cookie < new cookie
+[ "$(echo "$OLD_WATCHER" | cut -d ' ' -f 2)" = \
+ "$(echo "$NEW_WATCHER" | cut -d ' ' -f 2)" ]
+[[ $(echo "$OLD_WATCHER" | cut -d ' ' -f 3 | cut -d '=' -f 2) -lt \
+ $(echo "$NEW_WATCHER" | cut -d ' ' -f 3 | cut -d '=' -f 2) ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_fallocate.sh b/qa/workunits/rbd/krbd_fallocate.sh
new file mode 100755
index 00000000..f6e80656
--- /dev/null
+++ b/qa/workunits/rbd/krbd_fallocate.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+
+# - fallocate -z deallocates because BLKDEV_ZERO_NOUNMAP hint is ignored by
+# krbd
+#
+# - big unaligned blkdiscard and fallocate -z/-p leave the objects in place
+
+set -ex
+
+# no blkdiscard(8) in trusty
+function py_blkdiscard() {
+ local offset=$1
+
+ python <<EOF
+import fcntl, struct
+BLKDISCARD = 0x1277
+with open('$DEV', 'w') as dev:
+ fcntl.ioctl(dev, BLKDISCARD, struct.pack('QQ', $offset, $IMAGE_SIZE - $offset))
+EOF
+}
+
+# fallocate(1) in trusty doesn't support -z/-p
+function py_fallocate() {
+ local mode=$1
+ local offset=$2
+
+ python <<EOF
+import os, ctypes, ctypes.util
+FALLOC_FL_KEEP_SIZE = 0x01
+FALLOC_FL_PUNCH_HOLE = 0x02
+FALLOC_FL_ZERO_RANGE = 0x10
+libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
+with open('$DEV', 'w') as dev:
+ if libc.fallocate(dev.fileno(), ctypes.c_int($mode), ctypes.c_long($offset), ctypes.c_long($IMAGE_SIZE - $offset)):
+ err = ctypes.get_errno()
+ raise OSError(err, os.strerror(err))
+EOF
+}
+
+function allocate() {
+ xfs_io -c "pwrite -b $OBJECT_SIZE -W 0 $IMAGE_SIZE" $DEV
+ assert_allocated
+}
+
+function assert_allocated() {
+ cmp <(od -xAx $DEV) - <<EOF
+000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+*
+$(printf %x $IMAGE_SIZE)
+EOF
+ [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $NUM_OBJECTS ]]
+}
+
+function assert_zeroes() {
+ local num_objects_expected=$1
+
+ cmp <(od -xAx $DEV) - <<EOF
+000000 0000 0000 0000 0000 0000 0000 0000 0000
+*
+$(printf %x $IMAGE_SIZE)
+EOF
+ [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $num_objects_expected ]]
+}
+
+function assert_zeroes_unaligned() {
+ local num_objects_expected=$1
+
+ cmp <(od -xAx $DEV) - <<EOF
+000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+*
+$(printf %x $((OBJECT_SIZE / 2))) 0000 0000 0000 0000 0000 0000 0000 0000
+*
+$(printf %x $IMAGE_SIZE)
+EOF
+ [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $num_objects_expected ]]
+ for ((i = 0; i < $num_objects_expected; i++)); do
+ rados -p rbd stat rbd_data.$IMAGE_ID.$(printf %016x $i) | egrep "(size $((OBJECT_SIZE / 2)))|(size 0)"
+ done
+}
+
+IMAGE_NAME="fallocate-test"
+
+rbd create --size 200 $IMAGE_NAME
+
+IMAGE_SIZE=$(rbd info --format=json $IMAGE_NAME | python -c 'import sys, json; print json.load(sys.stdin)["size"]')
+OBJECT_SIZE=$(rbd info --format=json $IMAGE_NAME | python -c 'import sys, json; print json.load(sys.stdin)["object_size"]')
+NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE))
+[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]]
+
+IMAGE_ID="$(rbd info --format=json $IMAGE_NAME |
+ python -c "import sys, json; print json.load(sys.stdin)['block_name_prefix'].split('.')[1]")"
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+
+# make sure -ENOENT is hidden
+assert_zeroes 0
+py_blkdiscard 0
+assert_zeroes 0
+
+# blkdev_issue_discard
+allocate
+py_blkdiscard 0
+assert_zeroes 0
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP
+allocate
+py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE 0
+assert_zeroes 0
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK
+allocate
+py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE 0
+assert_zeroes 0
+
+# unaligned blkdev_issue_discard
+allocate
+py_blkdiscard $((OBJECT_SIZE / 2))
+assert_zeroes_unaligned $NUM_OBJECTS
+
+# unaligned blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP
+allocate
+py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE $((OBJECT_SIZE / 2))
+assert_zeroes_unaligned $NUM_OBJECTS
+
+# unaligned blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK
+allocate
+py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE $((OBJECT_SIZE / 2))
+assert_zeroes_unaligned $NUM_OBJECTS
+
+sudo rbd unmap $DEV
+
+DEV=$(sudo rbd map -o notrim $IMAGE_NAME)
+
+# blkdev_issue_discard
+allocate
+py_blkdiscard 0 |& grep 'Operation not supported'
+assert_allocated
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP
+allocate
+py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE 0
+assert_zeroes $NUM_OBJECTS
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK
+allocate
+py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE 0 |& grep 'Operation not supported'
+assert_allocated
+
+sudo rbd unmap $DEV
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh b/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh
new file mode 100755
index 00000000..f70f3863
--- /dev/null
+++ b/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -ex
+
+function run_test() {
+ ceph osd pool create foo 12
+ rbd pool init foo
+ rbd create --size 1 foo/img
+
+ local dev
+ dev=$(sudo rbd map foo/img)
+ sudo rbd unmap $dev
+
+ ceph osd pool delete foo foo --yes-i-really-really-mean-it
+}
+
+NUM_ITER=20
+
+for ((i = 0; i < $NUM_ITER; i++)); do
+ run_test
+done
+
+rbd create --size 1 img
+DEV=$(sudo rbd map img)
+for ((i = 0; i < $NUM_ITER; i++)); do
+ run_test
+done
+sudo rbd unmap $DEV
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_namespaces.sh b/qa/workunits/rbd/krbd_namespaces.sh
new file mode 100755
index 00000000..22b05b50
--- /dev/null
+++ b/qa/workunits/rbd/krbd_namespaces.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function get_block_name_prefix() {
+ rbd info --format=json $1 | python -c "import sys, json; print json.load(sys.stdin)['block_name_prefix']"
+}
+
+function do_pwrite() {
+ local spec=$1
+ local old_byte=$2
+ local new_byte=$3
+
+ local dev
+ dev=$(sudo rbd map $spec)
+ cmp <(dd if=/dev/zero bs=1M count=10 | tr \\000 \\$old_byte) $dev
+ xfs_io -c "pwrite -b 1M -S $new_byte 0 10M" $dev
+ sudo rbd unmap $dev
+}
+
+function do_cmp() {
+ local spec=$1
+ local byte=$2
+
+ local dev
+ dev=$(sudo rbd map $spec)
+ cmp <(dd if=/dev/zero bs=1M count=10 | tr \\000 \\$byte) $dev
+ sudo rbd unmap $dev
+}
+
+function gen_child_specs() {
+ local i=$1
+
+ local child_specs="foo/img$i-clone1 foo/img$i-clone2 foo/ns1/img$i-clone1 foo/ns1/img$i-clone2"
+ if [[ $i -ge 3 ]]; then
+ child_specs="$child_specs foo/ns2/img$i-clone1 foo/ns2/img$i-clone2"
+ fi
+ echo $child_specs
+}
+
+ceph osd pool create foo 12
+rbd pool init foo
+ceph osd pool create bar 12
+rbd pool init bar
+
+ceph osd set-require-min-compat-client nautilus
+rbd namespace create foo/ns1
+rbd namespace create foo/ns2
+
+SPECS=(foo/img1 foo/img2 foo/ns1/img3 foo/ns1/img4)
+
+COUNT=1
+for spec in "${SPECS[@]}"; do
+ if [[ $spec =~ img1|img3 ]]; then
+ rbd create --size 10 $spec
+ else
+ rbd create --size 10 --data-pool bar $spec
+ fi
+ do_pwrite $spec 000 $(printf %03d $COUNT)
+ rbd snap create $spec@snap
+ COUNT=$((COUNT + 1))
+done
+for i in {1..4}; do
+ for child_spec in $(gen_child_specs $i); do
+ if [[ $child_spec =~ clone1 ]]; then
+ rbd clone ${SPECS[i - 1]}@snap $child_spec
+ else
+ rbd clone --data-pool bar ${SPECS[i - 1]}@snap $child_spec
+ fi
+ do_pwrite $child_spec $(printf %03d $i) $(printf %03d $COUNT)
+ COUNT=$((COUNT + 1))
+ done
+done
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img1-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img1-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img1-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img1-clone2)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img2-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img2-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img2-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img2-clone2)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img3-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img3-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img3-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img3-clone2)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img4-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img4-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img4-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img4-clone2)) -eq 3 ]]
+
+COUNT=1
+for spec in "${SPECS[@]}"; do
+ do_cmp $spec $(printf %03d $COUNT)
+ COUNT=$((COUNT + 1))
+done
+for i in {1..4}; do
+ for child_spec in $(gen_child_specs $i); do
+ do_cmp $child_spec $(printf %03d $COUNT)
+ COUNT=$((COUNT + 1))
+ done
+done
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_stable_writes.sh b/qa/workunits/rbd/krbd_stable_writes.sh
new file mode 100755
index 00000000..45e04123
--- /dev/null
+++ b/qa/workunits/rbd/krbd_stable_writes.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+set -ex
+
+IMAGE_NAME="stable-writes-test"
+
+rbd create --size 1 $IMAGE_NAME
+DEV=$(sudo rbd map $IMAGE_NAME)
+[[ $(blockdev --getsize64 $DEV) -eq 1048576 ]]
+grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes
+
+rbd resize --size 2 $IMAGE_NAME
+[[ $(blockdev --getsize64 $DEV) -eq 2097152 ]]
+grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes
+
+sudo rbd unmap $DEV
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_enumerate.sh b/qa/workunits/rbd/krbd_udev_enumerate.sh
new file mode 100755
index 00000000..494f958f
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_enumerate.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+# This is a test for https://tracker.ceph.com/issues/41036, but it also
+# triggers https://tracker.ceph.com/issues/41404 in some environments.
+
+set -ex
+
+function assert_exit_codes() {
+ declare -a pids=($@)
+
+ for pid in ${pids[@]}; do
+ wait $pid
+ done
+}
+
+function run_map() {
+ declare -a pids
+
+ for i in {1..300}; do
+ sudo rbd map img$i &
+ pids+=($!)
+ done
+
+ assert_exit_codes ${pids[@]}
+ [[ $(rbd showmapped | wc -l) -eq 301 ]]
+}
+
+function run_unmap_by_dev() {
+ declare -a pids
+
+ run_map
+ for i in {0..299}; do
+ sudo rbd unmap /dev/rbd$i &
+ pids+=($!)
+ done
+
+ assert_exit_codes ${pids[@]}
+ [[ $(rbd showmapped | wc -l) -eq 0 ]]
+}
+
+function run_unmap_by_spec() {
+ declare -a pids
+
+ run_map
+ for i in {1..300}; do
+ sudo rbd unmap img$i &
+ pids+=($!)
+ done
+
+ assert_exit_codes ${pids[@]}
+ [[ $(rbd showmapped | wc -l) -eq 0 ]]
+}
+
+# Can't test with exclusive-lock, don't bother enabling deep-flatten.
+# See https://tracker.ceph.com/issues/42492.
+for i in {1..300}; do
+ rbd create --size 1 --image-feature '' img$i
+done
+
+for i in {1..30}; do
+ echo Iteration $i
+ run_unmap_by_dev
+ run_unmap_by_spec
+done
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh b/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh
new file mode 100755
index 00000000..7c9c53a2
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# This is a test for https://tracker.ceph.com/issues/41404, verifying that udev
+# events are properly reaped while the image is being (un)mapped in the kernel.
+# UDEV_BUF_SIZE is 1M (giving us a 2M socket receive buffer), but modprobe +
+# modprobe -r generate ~28M worth of "block" events.
+
+set -ex
+
+rbd create --size 1 img
+
+ceph osd pause
+sudo rbd map img &
+PID=$!
+sudo modprobe scsi_debug max_luns=16 add_host=16 num_parts=1 num_tgts=16
+sudo udevadm settle
+sudo modprobe -r scsi_debug
+[[ $(rbd showmapped | wc -l) -eq 0 ]]
+ceph osd unpause
+wait $PID
+[[ $(rbd showmapped | wc -l) -eq 2 ]]
+sudo rbd unmap img
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_netns.sh b/qa/workunits/rbd/krbd_udev_netns.sh
new file mode 100755
index 00000000..e746a682
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_netns.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+set -ex
+
+sudo ip netns add ns1
+sudo ip link add veth1-ext type veth peer name veth1-int
+sudo ip link set veth1-int netns ns1
+
+sudo ip netns exec ns1 ip link set dev lo up
+sudo ip netns exec ns1 ip addr add 192.168.1.2/24 dev veth1-int
+sudo ip netns exec ns1 ip link set veth1-int up
+sudo ip netns exec ns1 ip route add default via 192.168.1.1
+
+sudo ip addr add 192.168.1.1/24 dev veth1-ext
+sudo ip link set veth1-ext up
+
+# Enable forwarding between the namespace and the default route
+# interface and set up NAT. In case of multiple default routes,
+# just pick the first one.
+if [[ $(sysctl -n net.ipv4.ip_forward) -eq 0 ]]; then
+ sudo iptables -P FORWARD DROP
+ sudo sysctl -w net.ipv4.ip_forward=1
+fi
+IFACE="$(ip route list 0.0.0.0/0 | head -n 1 | cut -d ' ' -f 5)"
+sudo iptables -A FORWARD -i veth1-ext -o "$IFACE" -j ACCEPT
+sudo iptables -A FORWARD -i "$IFACE" -o veth1-ext -j ACCEPT
+sudo iptables -t nat -A POSTROUTING -s 192.168.1.2 -o "$IFACE" -j MASQUERADE
+
+rbd create --size 300 img
+
+DEV="$(sudo rbd map img)"
+mkfs.ext4 "$DEV"
+sudo mount "$DEV" /mnt
+sudo umount /mnt
+sudo rbd unmap "$DEV"
+
+sudo ip netns exec ns1 bash <<'EOF'
+
+set -ex
+
+DEV="/dev/rbd/rbd/img"
+[[ ! -e "$DEV" ]]
+
+# In a network namespace, "rbd map" maps the device and hangs waiting
+# for udev add uevents. udev runs as usual (in particular creating the
+# symlink which is used here because the device node is never printed),
+# but the uevents it sends out never come because they don't cross
+# network namespace boundaries.
+set +e
+timeout 30s rbd map img
+RET=$?
+set -e
+[[ $RET -eq 124 ]]
+[[ -L "$DEV" ]]
+mkfs.ext4 -F "$DEV"
+mount "$DEV" /mnt
+umount /mnt
+
+# In a network namespace, "rbd unmap" unmaps the device and hangs
+# waiting for udev remove uevents. udev runs as usual (removing the
+# symlink), but the uevents it sends out never come because they don't
+# cross network namespace boundaries.
+set +e
+timeout 30s rbd unmap "$DEV"
+RET=$?
+set -e
+[[ $RET -eq 124 ]]
+[[ ! -e "$DEV" ]]
+
+# Skip waiting for udev uevents with "-o noudev".
+DEV="$(rbd map -o noudev img)"
+mkfs.ext4 -F "$DEV"
+mount "$DEV" /mnt
+umount /mnt
+rbd unmap -o noudev "$DEV"
+
+EOF
+
+rbd rm img
+
+sudo iptables -t nat -D POSTROUTING -s 192.168.1.2 -o "$IFACE" -j MASQUERADE
+sudo iptables -D FORWARD -i "$IFACE" -o veth1-ext -j ACCEPT
+sudo iptables -D FORWARD -i veth1-ext -o "$IFACE" -j ACCEPT
+sudo ip netns delete ns1
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_symlinks.sh b/qa/workunits/rbd/krbd_udev_symlinks.sh
new file mode 100755
index 00000000..27147652
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_symlinks.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SPECS=(
+rbd/img1
+rbd/img2
+rbd/img2@snap1
+rbd/img3
+rbd/img3@snap1
+rbd/img3@snap2
+rbd/ns1/img1
+rbd/ns1/img2
+rbd/ns1/img2@snap1
+rbd/ns1/img3
+rbd/ns1/img3@snap1
+rbd/ns1/img3@snap2
+rbd/ns2/img1
+rbd/ns2/img2
+rbd/ns2/img2@snap1
+rbd/ns2/img3
+rbd/ns2/img3@snap1
+rbd/ns2/img3@snap2
+custom/img1
+custom/img1@snap1
+custom/img2
+custom/img2@snap1
+custom/img2@snap2
+custom/img3
+custom/ns1/img1
+custom/ns1/img1@snap1
+custom/ns1/img2
+custom/ns1/img2@snap1
+custom/ns1/img2@snap2
+custom/ns1/img3
+custom/ns2/img1
+custom/ns2/img1@snap1
+custom/ns2/img2
+custom/ns2/img2@snap1
+custom/ns2/img2@snap2
+custom/ns2/img3
+)
+
+ceph osd pool create custom 8
+rbd pool init custom
+
+ceph osd set-require-min-compat-client nautilus
+rbd namespace create rbd/ns1
+rbd namespace create rbd/ns2
+rbd namespace create custom/ns1
+rbd namespace create custom/ns2
+
+# create in order, images before snapshots
+for spec in "${SPECS[@]}"; do
+ if [[ "$spec" =~ snap ]]; then
+ rbd snap create "$spec"
+ else
+ rbd create --size 10 "$spec"
+ DEV="$(sudo rbd map "$spec")"
+ sudo sfdisk "$DEV" <<EOF
+unit: sectors
+${DEV}p1 : start= 2048, size= 2, type=83
+${DEV}p2 : start= 4096, size= 2, type=83
+EOF
+ sudo rbd unmap "$DEV"
+ fi
+done
+
+[[ ! -e /dev/rbd ]]
+
+# map in random order
+COUNT=${#SPECS[@]}
+read -r -a INDEXES < <(python3 <<EOF
+import random
+l = list(range($COUNT))
+random.shuffle(l)
+print(*l)
+EOF
+)
+
+DEVS=()
+for idx in "${INDEXES[@]}"; do
+ DEVS+=("$(sudo rbd map "${SPECS[idx]}")")
+done
+
+[[ $(rbd showmapped | wc -l) -eq $((COUNT + 1)) ]]
+
+for ((i = 0; i < COUNT; i++)); do
+ [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}")" == "${DEVS[i]}" ]]
+ [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}-part1")" == "${DEVS[i]}p1" ]]
+ [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}-part2")" == "${DEVS[i]}p2" ]]
+done
+
+for idx in "${INDEXES[@]}"; do
+ sudo rbd unmap "/dev/rbd/${SPECS[idx]}"
+done
+
+[[ ! -e /dev/rbd ]]
+
+# remove in reverse order, snapshots before images
+for ((i = COUNT - 1; i >= 0; i--)); do
+ if [[ "${SPECS[i]}" =~ snap ]]; then
+ rbd snap rm "${SPECS[i]}"
+ else
+ rbd rm "${SPECS[i]}"
+ fi
+done
+
+rbd namespace rm custom/ns2
+rbd namespace rm custom/ns1
+rbd namespace rm rbd/ns2
+rbd namespace rm rbd/ns1
+
+ceph osd pool delete custom custom --yes-i-really-really-mean-it
+
+echo OK
diff --git a/qa/workunits/rbd/map-snapshot-io.sh b/qa/workunits/rbd/map-snapshot-io.sh
new file mode 100755
index 00000000..a69d8482
--- /dev/null
+++ b/qa/workunits/rbd/map-snapshot-io.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# http://tracker.ceph.com/issues/3964
+
+set -ex
+
+rbd create image -s 100
+DEV=$(sudo rbd map image)
+dd if=/dev/zero of=$DEV oflag=direct count=10
+rbd snap create image@s1
+dd if=/dev/zero of=$DEV oflag=direct count=10 # used to fail
+rbd snap rm image@s1
+dd if=/dev/zero of=$DEV oflag=direct count=10
+sudo rbd unmap $DEV
+rbd rm image
+
+echo OK
diff --git a/qa/workunits/rbd/map-unmap.sh b/qa/workunits/rbd/map-unmap.sh
new file mode 100755
index 00000000..99863849
--- /dev/null
+++ b/qa/workunits/rbd/map-unmap.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+set -ex
+
+RUN_TIME=300 # approximate duration of run (seconds)
+
+[ $# -eq 1 ] && RUN_TIME="$1"
+
+IMAGE_NAME="image-$$"
+IMAGE_SIZE="1024" # MB
+
+function get_time() {
+ date '+%s'
+}
+
+function times_up() {
+ local end_time="$1"
+
+ test $(get_time) -ge "${end_time}"
+}
+
+function map_unmap() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+
+ local dev
+ dev="$(sudo rbd map "${image_name}")"
+ sudo rbd unmap "${dev}"
+}
+
+#### Start
+
+rbd create "${IMAGE_NAME}" --size="${IMAGE_SIZE}"
+
+COUNT=0
+START_TIME=$(get_time)
+END_TIME=$(expr $(get_time) + ${RUN_TIME})
+while ! times_up "${END_TIME}"; do
+ map_unmap "${IMAGE_NAME}"
+ COUNT=$(expr $COUNT + 1)
+done
+ELAPSED=$(expr "$(get_time)" - "${START_TIME}")
+
+rbd rm "${IMAGE_NAME}"
+
+echo "${COUNT} iterations completed in ${ELAPSED} seconds"
diff --git a/qa/workunits/rbd/merge_diff.sh b/qa/workunits/rbd/merge_diff.sh
new file mode 100755
index 00000000..eb859730
--- /dev/null
+++ b/qa/workunits/rbd/merge_diff.sh
@@ -0,0 +1,477 @@
+#!/usr/bin/env bash
+set -ex
+
+export RBD_FORCE_ALLOW_V1=1
+
+pool=rbd
+gen=$pool/gen
+out=$pool/out
+testno=1
+
+mkdir -p merge_diff_test
+pushd merge_diff_test
+
+function expect_false()
+{
+ if "$@"; then return 1; else return 0; fi
+}
+
+function clear_all()
+{
+ fusermount -u mnt || true
+
+ rbd snap purge --no-progress $gen || true
+ rbd rm --no-progress $gen || true
+ rbd snap purge --no-progress $out || true
+ rbd rm --no-progress $out || true
+
+ rm -rf diffs || true
+}
+
+function rebuild()
+{
+ clear_all
+ echo Starting test $testno
+ ((testno++))
+ if [[ "$2" -lt "$1" ]] && [[ "$3" -gt "1" ]]; then
+ rbd create $gen --size 100 --object-size $1 --stripe-unit $2 --stripe-count $3 --image-format $4
+ else
+ rbd create $gen --size 100 --object-size $1 --image-format $4
+ fi
+ rbd create $out --size 1 --object-size 524288
+ mkdir -p mnt diffs
+ # lttng has atexit handlers that need to be fork/clone aware
+ LD_PRELOAD=liblttng-ust-fork.so.0 rbd-fuse -p $pool mnt
+}
+
+function write()
+{
+ dd if=/dev/urandom of=mnt/gen bs=1M conv=notrunc seek=$1 count=$2
+}
+
+function snap()
+{
+ rbd snap create $gen@$1
+}
+
+function resize()
+{
+ rbd resize --no-progress $gen --size $1 --allow-shrink
+}
+
+function export_diff()
+{
+ if [ $2 == "head" ]; then
+ target="$gen"
+ else
+ target="$gen@$2"
+ fi
+ if [ $1 == "null" ]; then
+ rbd export-diff --no-progress $target diffs/$1.$2
+ else
+ rbd export-diff --no-progress $target --from-snap $1 diffs/$1.$2
+ fi
+}
+
+function merge_diff()
+{
+ rbd merge-diff diffs/$1.$2 diffs/$2.$3 diffs/$1.$3
+}
+
+function check()
+{
+ rbd import-diff --no-progress diffs/$1.$2 $out || return -1
+ if [ "$2" == "head" ]; then
+ sum1=`rbd export $gen - | md5sum`
+ else
+ sum1=`rbd export $gen@$2 - | md5sum`
+ fi
+ sum2=`rbd export $out - | md5sum`
+ if [ "$sum1" != "$sum2" ]; then
+ exit -1
+ fi
+ if [ "$2" != "head" ]; then
+ rbd snap ls $out | awk '{print $2}' | grep "^$2\$" || return -1
+ fi
+}
+
+#test f/t header
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+export_diff null a
+export_diff a head
+merge_diff null a head
+check null head
+
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+snap b
+write 2 1
+export_diff null a
+export_diff a b
+export_diff b head
+merge_diff null a b
+check null b
+
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+snap b
+write 2 1
+export_diff null a
+export_diff a b
+export_diff b head
+merge_diff a b head
+check null a
+check a head
+
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+snap b
+write 2 1
+export_diff null a
+export_diff a b
+export_diff b head
+rbd merge-diff diffs/null.a diffs/a.b - | rbd merge-diff - diffs/b.head - > diffs/null.head
+check null head
+
+#data test
+rebuild 4194304 4194304 1 2
+write 4 2
+snap s101
+write 0 3
+write 8 2
+snap s102
+export_diff null s101
+export_diff s101 s102
+merge_diff null s101 s102
+check null s102
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 2 5
+write 8 2
+snap s201
+write 0 2
+write 6 3
+snap s202
+export_diff null s201
+export_diff s201 s202
+merge_diff null s201 s202
+check null s202
+
+rebuild 4194304 4194304 1 2
+write 0 4
+write 12 6
+snap s301
+write 0 6
+write 10 5
+write 16 4
+snap s302
+export_diff null s301
+export_diff s301 s302
+merge_diff null s301 s302
+check null s302
+
+rebuild 4194304 4194304 1 2
+write 0 12
+write 14 2
+write 18 2
+snap s401
+write 1 2
+write 5 6
+write 13 3
+write 18 2
+snap s402
+export_diff null s401
+export_diff s401 s402
+merge_diff null s401 s402
+check null s402
+
+rebuild 4194304 4194304 1 2
+write 2 4
+write 10 12
+write 27 6
+write 36 4
+snap s501
+write 0 24
+write 28 4
+write 36 4
+snap s502
+export_diff null s501
+export_diff s501 s502
+merge_diff null s501 s502
+check null s502
+
+rebuild 4194304 4194304 1 2
+write 0 8
+resize 5
+snap r1
+resize 20
+write 12 8
+snap r2
+resize 8
+write 4 4
+snap r3
+export_diff null r1
+export_diff r1 r2
+export_diff r2 r3
+merge_diff null r1 r2
+merge_diff null r2 r3
+check null r3
+
+rebuild 4194304 4194304 1 2
+write 0 8
+resize 5
+snap r1
+resize 20
+write 12 8
+snap r2
+resize 8
+write 4 4
+snap r3
+resize 10
+snap r4
+export_diff null r1
+export_diff r1 r2
+export_diff r2 r3
+export_diff r3 r4
+merge_diff null r1 r2
+merge_diff null r2 r3
+merge_diff null r3 r4
+check null r4
+
+# merge diff doesn't yet support fancy striping
+# rebuild 4194304 65536 8 2
+# write 0 32
+# snap r1
+# write 16 32
+# snap r2
+# export_diff null r1
+# export_diff r1 r2
+# expect_false merge_diff null r1 r2
+
+rebuild 4194304 4194304 1 2
+write 0 1
+write 2 1
+write 4 1
+write 6 1
+snap s1
+write 1 1
+write 3 1
+write 5 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 1 1
+write 3 1
+write 5 1
+snap s1
+write 0 1
+write 2 1
+write 4 1
+write 6 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 1 1
+write 7 1
+write 13 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 0 1
+write 6 1
+write 12 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 2 1
+write 8 1
+write 14 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 1 1
+write 7 1
+write 13 1
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 1
+write 6 1
+write 12 1
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 2 1
+write 8 1
+write 14 1
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 2 4
+write 8 4
+write 14 4
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 4
+write 6 4
+write 12 4
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 6
+write 6 6
+write 12 6
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 3 6
+write 9 6
+write 15 6
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 8
+snap s1
+resize 2
+resize 100
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 8
+snap s1
+resize 2
+resize 100
+snap s2
+write 20 2
+snap s3
+export_diff null s1
+export_diff s1 s2
+export_diff s2 s3
+merge_diff s1 s2 s3
+check null s1
+check s1 s3
+
+#addme
+
+clear_all
+popd
+rm -rf merge_diff_test
+
+echo OK
diff --git a/qa/workunits/rbd/notify_master.sh b/qa/workunits/rbd/notify_master.sh
new file mode 100755
index 00000000..6ebea31e
--- /dev/null
+++ b/qa/workunits/rbd/notify_master.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -ex
+
+relpath=$(dirname $0)/../../../src/test/librbd
+python $relpath/test_notify.py master
+exit 0
diff --git a/qa/workunits/rbd/notify_slave.sh b/qa/workunits/rbd/notify_slave.sh
new file mode 100755
index 00000000..ea66161a
--- /dev/null
+++ b/qa/workunits/rbd/notify_slave.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -ex
+
+relpath=$(dirname $0)/../../../src/test/librbd
+python $relpath/test_notify.py slave
+exit 0
diff --git a/qa/workunits/rbd/permissions.sh b/qa/workunits/rbd/permissions.sh
new file mode 100755
index 00000000..cf836848
--- /dev/null
+++ b/qa/workunits/rbd/permissions.sh
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+set -ex
+
+IMAGE_FEATURES="layering,exclusive-lock,object-map,fast-diff"
+
+clone_v2_enabled() {
+ image_spec=$1
+ rbd info $image_spec | grep "clone-parent"
+}
+
+create_pools() {
+ ceph osd pool create images 32
+ rbd pool init images
+ ceph osd pool create volumes 32
+ rbd pool init volumes
+}
+
+delete_pools() {
+ (ceph osd pool delete images images --yes-i-really-really-mean-it || true) >/dev/null 2>&1
+ (ceph osd pool delete volumes volumes --yes-i-really-really-mean-it || true) >/dev/null 2>&1
+
+}
+
+recreate_pools() {
+ delete_pools
+ create_pools
+}
+
+delete_users() {
+ (ceph auth del client.volumes || true) >/dev/null 2>&1
+ (ceph auth del client.images || true) >/dev/null 2>&1
+
+ (ceph auth del client.snap_none || true) >/dev/null 2>&1
+ (ceph auth del client.snap_all || true) >/dev/null 2>&1
+ (ceph auth del client.snap_pool || true) >/dev/null 2>&1
+ (ceph auth del client.snap_profile_all || true) >/dev/null 2>&1
+ (ceph auth del client.snap_profile_pool || true) >/dev/null 2>&1
+
+ (ceph auth del client.mon_write || true) >/dev/null 2>&1
+}
+
+create_users() {
+ ceph auth get-or-create client.volumes \
+ mon 'profile rbd' \
+ osd 'profile rbd pool=volumes, profile rbd-read-only pool=images' \
+ mgr 'profile rbd pool=volumes, profile rbd-read-only pool=images' >> $KEYRING
+ ceph auth get-or-create client.images mon 'profile rbd' osd 'profile rbd pool=images' >> $KEYRING
+
+ ceph auth get-or-create client.snap_none mon 'allow r' >> $KEYRING
+ ceph auth get-or-create client.snap_all mon 'allow r' osd 'allow w' >> $KEYRING
+ ceph auth get-or-create client.snap_pool mon 'allow r' osd 'allow w pool=images' >> $KEYRING
+ ceph auth get-or-create client.snap_profile_all mon 'allow r' osd 'profile rbd' >> $KEYRING
+ ceph auth get-or-create client.snap_profile_pool mon 'allow r' osd 'profile rbd pool=images' >> $KEYRING
+
+ ceph auth get-or-create client.mon_write mon 'allow *' >> $KEYRING
+}
+
+expect() {
+
+ set +e
+
+ local expected_ret=$1
+ local ret
+
+ shift
+ cmd=$@
+
+ eval $cmd
+ ret=$?
+
+ set -e
+
+ if [[ $ret -ne $expected_ret ]]; then
+ echo "ERROR: running \'$cmd\': expected $expected_ret got $ret"
+ return 1
+ fi
+
+ return 0
+}
+
+test_images_access() {
+ rbd -k $KEYRING --id images create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 images/foo
+ rbd -k $KEYRING --id images snap create images/foo@snap
+ rbd -k $KEYRING --id images snap protect images/foo@snap
+ rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ rbd -k $KEYRING --id images snap protect images/foo@snap
+ rbd -k $KEYRING --id images export images/foo@snap - >/dev/null
+ expect 16 rbd -k $KEYRING --id images snap rm images/foo@snap
+
+ rbd -k $KEYRING --id volumes clone --image-feature $IMAGE_FEATURES images/foo@snap volumes/child
+
+ if ! clone_v2_enabled images/foo; then
+ expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ fi
+
+ expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap
+ expect 1 rbd -k $KEYRING --id images flatten volumes/child
+ rbd -k $KEYRING --id volumes flatten volumes/child
+ expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap
+ rbd -k $KEYRING --id images snap unprotect images/foo@snap
+
+ expect 39 rbd -k $KEYRING --id images rm images/foo
+ rbd -k $KEYRING --id images snap rm images/foo@snap
+ rbd -k $KEYRING --id images rm images/foo
+ rbd -k $KEYRING --id volumes rm volumes/child
+}
+
+test_volumes_access() {
+ rbd -k $KEYRING --id images create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 images/foo
+ rbd -k $KEYRING --id images snap create images/foo@snap
+ rbd -k $KEYRING --id images snap protect images/foo@snap
+
+ # commands that work with read-only access
+ rbd -k $KEYRING --id volumes info images/foo@snap
+ rbd -k $KEYRING --id volumes snap ls images/foo
+ rbd -k $KEYRING --id volumes export images/foo - >/dev/null
+ rbd -k $KEYRING --id volumes cp images/foo volumes/foo_copy
+ rbd -k $KEYRING --id volumes rm volumes/foo_copy
+ rbd -k $KEYRING --id volumes children images/foo@snap
+ rbd -k $KEYRING --id volumes lock list images/foo
+
+ # commands that fail with read-only access
+ expect 1 rbd -k $KEYRING --id volumes resize -s 2 images/foo --allow-shrink
+ expect 1 rbd -k $KEYRING --id volumes snap create images/foo@2
+ expect 1 rbd -k $KEYRING --id volumes snap rollback images/foo@snap
+ expect 1 rbd -k $KEYRING --id volumes snap remove images/foo@snap
+ expect 1 rbd -k $KEYRING --id volumes snap purge images/foo
+ expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap
+ expect 1 rbd -k $KEYRING --id volumes flatten images/foo
+ expect 1 rbd -k $KEYRING --id volumes lock add images/foo test
+ expect 1 rbd -k $KEYRING --id volumes lock remove images/foo test locker
+ expect 1 rbd -k $KEYRING --id volumes ls rbd
+
+ # create clone and snapshot
+ rbd -k $KEYRING --id volumes clone --image-feature $IMAGE_FEATURES images/foo@snap volumes/child
+ rbd -k $KEYRING --id volumes snap create volumes/child@snap1
+ rbd -k $KEYRING --id volumes snap protect volumes/child@snap1
+ rbd -k $KEYRING --id volumes snap create volumes/child@snap2
+
+ # make sure original snapshot stays protected
+ if clone_v2_enabled images/foo; then
+ rbd -k $KEYRING --id volumes flatten volumes/child
+ rbd -k $KEYRING --id volumes snap rm volumes/child@snap2
+ rbd -k $KEYRING --id volumes snap unprotect volumes/child@snap1
+ else
+ expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ rbd -k $KEYRING --id volumes flatten volumes/child
+ expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ rbd -k $KEYRING --id volumes snap rm volumes/child@snap2
+ expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ expect 2 rbd -k $KEYRING --id volumes snap rm volumes/child@snap2
+ rbd -k $KEYRING --id volumes snap unprotect volumes/child@snap1
+ expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ fi
+
+ # clean up
+ rbd -k $KEYRING --id volumes snap rm volumes/child@snap1
+ rbd -k $KEYRING --id images snap unprotect images/foo@snap
+ rbd -k $KEYRING --id images snap rm images/foo@snap
+ rbd -k $KEYRING --id images rm images/foo
+ rbd -k $KEYRING --id volumes rm volumes/child
+}
+
+create_self_managed_snapshot() {
+ ID=$1
+ POOL=$2
+
+ cat << EOF | CEPH_ARGS="-k $KEYRING" python
+import rados
+
+cluster = rados.Rados(conffile="", rados_id="${ID}")
+cluster.connect()
+ioctx = cluster.open_ioctx("${POOL}")
+
+snap_id = ioctx.create_self_managed_snap()
+print ("Created snap id {}".format(snap_id))
+EOF
+}
+
+remove_self_managed_snapshot() {
+ ID=$1
+ POOL=$2
+
+ cat << EOF | CEPH_ARGS="-k $KEYRING" python
+import rados
+
+cluster1 = rados.Rados(conffile="", rados_id="mon_write")
+cluster1.connect()
+ioctx1 = cluster1.open_ioctx("${POOL}")
+
+snap_id = ioctx1.create_self_managed_snap()
+print ("Created snap id {}".format(snap_id))
+
+cluster2 = rados.Rados(conffile="", rados_id="${ID}")
+cluster2.connect()
+ioctx2 = cluster2.open_ioctx("${POOL}")
+
+ioctx2.remove_self_managed_snap(snap_id)
+print ("Removed snap id {}".format(snap_id))
+EOF
+}
+
+test_remove_self_managed_snapshots() {
+ # Ensure users cannot create self-managed snapshots w/o permissions
+ expect 1 create_self_managed_snapshot snap_none images
+ expect 1 create_self_managed_snapshot snap_none volumes
+
+ create_self_managed_snapshot snap_all images
+ create_self_managed_snapshot snap_all volumes
+
+ create_self_managed_snapshot snap_pool images
+ expect 1 create_self_managed_snapshot snap_pool volumes
+
+ create_self_managed_snapshot snap_profile_all images
+ create_self_managed_snapshot snap_profile_all volumes
+
+ create_self_managed_snapshot snap_profile_pool images
+ expect 1 create_self_managed_snapshot snap_profile_pool volumes
+
+ # Ensure users cannot delete self-managed snapshots w/o permissions
+ expect 1 remove_self_managed_snapshot snap_none images
+ expect 1 remove_self_managed_snapshot snap_none volumes
+
+ remove_self_managed_snapshot snap_all images
+ remove_self_managed_snapshot snap_all volumes
+
+ remove_self_managed_snapshot snap_pool images
+ expect 1 remove_self_managed_snapshot snap_pool volumes
+
+ remove_self_managed_snapshot snap_profile_all images
+ remove_self_managed_snapshot snap_profile_all volumes
+
+ remove_self_managed_snapshot snap_profile_pool images
+ expect 1 remove_self_managed_snapshot snap_profile_pool volumes
+}
+
+test_rbd_support() {
+ # read-only commands should work on both pools
+ ceph -k $KEYRING --id volumes rbd perf image stats volumes
+ ceph -k $KEYRING --id volumes rbd perf image stats images
+
+ # read/write commands should only work on 'volumes'
+ rbd -k $KEYRING --id volumes create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 volumes/foo
+ ceph -k $KEYRING --id volumes rbd task add remove volumes/foo
+ expect 13 ceph -k $KEYRING --id volumes rbd task add remove images/foo
+}
+
+cleanup() {
+ rm -f $KEYRING
+}
+
+KEYRING=$(mktemp)
+trap cleanup EXIT ERR HUP INT QUIT
+
+delete_users
+create_users
+
+recreate_pools
+test_images_access
+
+recreate_pools
+test_volumes_access
+
+test_remove_self_managed_snapshots
+
+test_rbd_support
+
+delete_pools
+delete_users
+
+echo OK
+exit 0
diff --git a/qa/workunits/rbd/qemu-iotests.sh b/qa/workunits/rbd/qemu-iotests.sh
new file mode 100755
index 00000000..1cd9c4e0
--- /dev/null
+++ b/qa/workunits/rbd/qemu-iotests.sh
@@ -0,0 +1,53 @@
+#!/bin/sh -ex
+
+# Run qemu-iotests against rbd. These are block-level tests that go
+# through qemu but do not involve running a full vm. Note that these
+# require the admin ceph user, as there's no way to pass the ceph user
+# to qemu-iotests currently.
+
+testlist='001 002 003 004 005 008 009 010 011 021 025 032 033 055'
+
+git clone https://github.com/qemu/qemu.git
+cd qemu
+if lsb_release -da 2>&1 | grep -iq 'bionic'; then
+ # Bionic requires a matching test harness
+ git checkout v2.11.0
+elif lsb_release -da 2>&1 | grep -iq 'xenial'; then
+ # Xenial requires a recent test harness
+ git checkout v2.3.0
+else
+ # use v2.2.0-rc3 (last released version that handles all the tests
+ git checkout 2528043f1f299e0e88cb026f1ca7c40bbb4e1f80
+fi
+
+cd tests/qemu-iotests
+mkdir bin
+# qemu-iotests expects a binary called just 'qemu' to be available
+if [ -x '/usr/bin/qemu-system-x86_64' ]
+then
+ QEMU='/usr/bin/qemu-system-x86_64'
+
+ # Bionic (v2.11.0) tests expect all tools in current directory
+ ln -s $QEMU qemu
+ ln -s /usr/bin/qemu-img
+ ln -s /usr/bin/qemu-io
+ ln -s /usr/bin/qemu-nbd
+else
+ QEMU='/usr/libexec/qemu-kvm'
+
+ # disable test 055 since qemu-kvm (RHEL/CentOS) doesn't support the
+ # required QMP commands
+ testlist=$(echo ${testlist} | sed "s/ 055//g")
+fi
+ln -s $QEMU bin/qemu
+
+# this is normally generated by configure, but has nothing but a python
+# binary definition, which we don't care about. for some reason it is
+# not present on trusty.
+touch common.env
+
+# TEST_DIR is the pool for rbd
+TEST_DIR=rbd PATH="$PATH:$PWD/bin" ./check -rbd $testlist
+
+cd ../../..
+rm -rf qemu
diff --git a/qa/workunits/rbd/qemu_dynamic_features.sh b/qa/workunits/rbd/qemu_dynamic_features.sh
new file mode 100755
index 00000000..70e9fbb3
--- /dev/null
+++ b/qa/workunits/rbd/qemu_dynamic_features.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+set -x
+
+if [[ -z "${IMAGE_NAME}" ]]; then
+ echo image name must be provided
+ exit 1
+fi
+
+is_qemu_running() {
+ rbd status ${IMAGE_NAME} | grep -v "Watchers: none"
+}
+
+wait_for_qemu() {
+ while ! is_qemu_running ; do
+ echo "*** Waiting for QEMU"
+ sleep 30
+ done
+}
+
+wait_for_qemu
+rbd feature disable ${IMAGE_NAME} journaling
+rbd feature disable ${IMAGE_NAME} object-map
+rbd feature disable ${IMAGE_NAME} exclusive-lock
+
+while is_qemu_running ; do
+ echo "*** Enabling all features"
+ rbd feature enable ${IMAGE_NAME} exclusive-lock || break
+ rbd feature enable ${IMAGE_NAME} journaling || break
+ rbd feature enable ${IMAGE_NAME} object-map || break
+ if is_qemu_running ; then
+ sleep 60
+ fi
+
+ echo "*** Disabling all features"
+ rbd feature disable ${IMAGE_NAME} journaling || break
+ rbd feature disable ${IMAGE_NAME} object-map || break
+ rbd feature disable ${IMAGE_NAME} exclusive-lock || break
+ if is_qemu_running ; then
+ sleep 60
+ fi
+done
+
+if is_qemu_running ; then
+ echo "RBD command failed on alive QEMU"
+ exit 1
+fi
diff --git a/qa/workunits/rbd/qemu_rebuild_object_map.sh b/qa/workunits/rbd/qemu_rebuild_object_map.sh
new file mode 100755
index 00000000..2647dcdc
--- /dev/null
+++ b/qa/workunits/rbd/qemu_rebuild_object_map.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -ex
+
+if [[ -z "${IMAGE_NAME}" ]]; then
+ echo image name must be provided
+ exit 1
+fi
+
+is_qemu_running() {
+ rbd status ${IMAGE_NAME} | grep -v "Watchers: none"
+}
+
+wait_for_qemu() {
+ while ! is_qemu_running ; do
+ echo "*** Waiting for QEMU"
+ sleep 30
+ done
+}
+
+wait_for_qemu
+rbd feature disable ${IMAGE_NAME} journaling || true
+rbd feature disable ${IMAGE_NAME} fast-diff || true
+rbd feature disable ${IMAGE_NAME} object-map || true
+rbd feature disable ${IMAGE_NAME} exclusive-lock || true
+
+rbd feature enable ${IMAGE_NAME} exclusive-lock
+rbd feature enable ${IMAGE_NAME} object-map
+
+while is_qemu_running ; do
+ echo "*** Rebuilding object map"
+ rbd object-map rebuild ${IMAGE_NAME}
+
+ if is_qemu_running ; then
+ sleep 60
+ fi
+done
+
diff --git a/qa/workunits/rbd/rbd-ggate.sh b/qa/workunits/rbd/rbd-ggate.sh
new file mode 100755
index 00000000..9b60158d
--- /dev/null
+++ b/qa/workunits/rbd/rbd-ggate.sh
@@ -0,0 +1,242 @@
+#!/bin/sh -ex
+
+POOL=testrbdggate$$
+NS=ns
+IMAGE=test
+SIZE=64
+DATA=
+DEV=
+
+if which xmlstarlet > /dev/null 2>&1; then
+ XMLSTARLET=xmlstarlet
+elif which xml > /dev/null 2>&1; then
+ XMLSTARLET=xml
+else
+ echo "Missing xmlstarlet binary!"
+ exit 1
+fi
+
+if [ `uname -K` -ge 1200078 ] ; then
+ RBD_GGATE_RESIZE_SUPPORTED=1
+fi
+
+_sudo()
+{
+ local cmd
+
+ if [ `id -u` -eq 0 ]
+ then
+ "$@"
+ return $?
+ fi
+
+ # Look for the command in the user path. If it fails run it as is,
+ # supposing it is in sudo path.
+ cmd=`which $1 2>/dev/null` || cmd=$1
+ shift
+ sudo -nE "${cmd}" "$@"
+}
+
+check_geom_gate()
+{
+ # See if geom_date is load, or can be loaded.
+ # Otherwise the tests can not run
+ if ! kldstat -q -n geom_gate ; then
+ # See if we can load it
+ if ! _sudo kldload geom_gate ; then
+ echo Not able to load geom_gate
+ echo check /var/log/messages as to why
+ exit 1
+ fi
+ fi
+}
+
+setup()
+{
+ local ns x
+
+ if [ -e CMakeCache.txt ]; then
+ # running under cmake build dir
+
+ CEPH_SRC=$(readlink -f $(dirname $0)/../../../src)
+ CEPH_ROOT=${PWD}
+ CEPH_BIN=${CEPH_ROOT}/bin
+
+ export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+ export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind
+ for x in ${CEPH_ROOT}/lib/cython_modules/lib* ; do
+ PYTHONPATH="${PYTHONPATH}:${x}"
+ done
+ PATH=${CEPH_BIN}:${PATH}
+ fi
+
+ _sudo echo test sudo
+ check_geom_gate
+
+ trap cleanup INT TERM EXIT
+ TEMPDIR=`mktemp -d`
+ DATA=${TEMPDIR}/data
+ dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+ ceph osd pool create ${POOL} 32
+
+ rbd namespace create ${POOL}/${NS}
+ for ns in '' ${NS}; do
+ rbd --dest-pool ${POOL} --dest-namespace "${ns}" --no-progress import \
+ ${DATA} ${IMAGE}
+ done
+}
+
+cleanup()
+{
+ local ns s
+
+ set +e
+ rm -Rf ${TEMPDIR}
+ if [ -n "${DEV}" ]
+ then
+ _sudo rbd-ggate unmap ${DEV}
+ fi
+
+ ceph osd pool delete ${POOL} ${POOL} --yes-i-really-really-mean-it
+}
+
+expect_false()
+{
+ if "$@"; then return 1; else return 0; fi
+}
+
+#
+# main
+#
+
+setup
+
+echo exit status test
+expect_false rbd-ggate
+expect_false rbd-ggate INVALIDCMD
+if [ `id -u` -ne 0 ]
+then
+ expect_false rbd-ggate map ${IMAGE}
+fi
+expect_false _sudo rbd-ggate map INVALIDIMAGE
+
+echo map test using the first unused device
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}`
+rbd-ggate list | grep " ${DEV} *$"
+
+echo map test specifying the device
+expect_false _sudo rbd-ggate --device ${DEV} map ${POOL}/${IMAGE}
+dev1=${DEV}
+_sudo rbd-ggate unmap ${DEV}
+rbd-ggate list | expect_false grep " ${DEV} *$"
+DEV=
+# XXX: race possible when the device is reused by other process
+DEV=`_sudo rbd-ggate --device ${dev1} map ${POOL}/${IMAGE}`
+[ "${DEV}" = "${dev1}" ]
+rbd-ggate list | grep " ${DEV} *$"
+
+echo list format test
+expect_false _sudo rbd-ggate --format INVALID list
+rbd-ggate --format json --pretty-format list
+rbd-ggate --format xml list
+
+echo read test
+[ "`dd if=${DATA} bs=1M | md5`" = "`_sudo dd if=${DEV} bs=1M | md5`" ]
+
+echo write test
+dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+_sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo sync
+[ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+
+echo trim test
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+_sudo newfs -E ${DEV}
+_sudo sync
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -lt "${provisioned}" ]
+
+echo resize test
+devname=$(basename ${DEV})
+size=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+test -n "${size}"
+rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M
+rbd info ${POOL}/${IMAGE}
+if [ -z "$RBD_GGATE_RESIZE_SUPPORTED" ]; then
+ # when resizing is not supported:
+ # resizing the underlying image for a GEOM ggate will stop the
+ # ggate process servicing the device. So we can resize and test
+ # the disappearance of the device
+ rbd-ggate list | expect_false grep " ${DEV} *$"
+else
+ rbd-ggate list | grep " ${DEV} *$"
+ size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+ test -n "${size2}"
+ test ${size2} -eq $((size * 2))
+ dd if=/dev/urandom of=${DATA} bs=1M count=$((SIZE * 2))
+ _sudo dd if=${DATA} of=${DEV} bs=1M
+ _sudo sync
+ [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+ rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
+ rbd info ${POOL}/${IMAGE}
+ size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+ test -n "${size2}"
+ test ${size2} -eq ${size}
+ truncate -s ${SIZE}M ${DATA}
+ [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+ _sudo rbd-ggate unmap ${DEV}
+fi
+DEV=
+
+echo read-only option test
+DEV=`_sudo rbd-ggate map --read-only ${POOL}/${IMAGE}`
+devname=$(basename ${DEV})
+rbd-ggate list | grep " ${DEV} *$"
+access=$(geom gate list ${devname} | awk '$1 == "access:" {print $2}')
+test "${access}" = "read-only"
+_sudo dd if=${DEV} of=/dev/null bs=1M
+expect_false _sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo rbd-ggate unmap ${DEV}
+
+echo exclusive option test
+DEV=`_sudo rbd-ggate map --exclusive ${POOL}/${IMAGE}`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo sync
+expect_false timeout 10 \
+ rbd -p ${POOL} bench ${IMAGE} --io-type=write --io-size=1024 --io-total=1024
+_sudo rbd-ggate unmap ${DEV}
+DEV=
+rbd bench -p ${POOL} ${IMAGE} --io-type=write --io-size=1024 --io-total=1024
+
+echo unmap by image name test
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo rbd-ggate unmap "${POOL}/${IMAGE}"
+rbd-ggate list | expect_false grep " ${DEV} *$"
+DEV=
+
+echo map/unmap snap test
+rbd snap create ${POOL}/${IMAGE}@snap
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}@snap`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo rbd-ggate unmap "${POOL}/${IMAGE}@snap"
+rbd-ggate list | expect_false grep " ${DEV} *$"
+DEV=
+
+echo map/unmap namespace test
+rbd snap create ${POOL}/${NS}/${IMAGE}@snap
+DEV=`_sudo rbd-ggate map ${POOL}/${NS}/${IMAGE}@snap`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo rbd-ggate unmap "${POOL}/${NS}/${IMAGE}@snap"
+rbd-ggate list | expect_false grep "${DEV} $"
+DEV=
+
+echo OK
diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh
new file mode 100755
index 00000000..63728e06
--- /dev/null
+++ b/qa/workunits/rbd/rbd-nbd.sh
@@ -0,0 +1,253 @@
+#!/usr/bin/env bash
+set -ex
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+POOL=rbd
+NS=ns
+IMAGE=testrbdnbd$$
+SIZE=64
+DATA=
+DEV=
+
+_sudo()
+{
+ local cmd
+
+ if [ `id -u` -eq 0 ]
+ then
+ "$@"
+ return $?
+ fi
+
+ # Look for the command in the user path. If it fails run it as is,
+ # supposing it is in sudo path.
+ cmd=`which $1 2>/dev/null` || cmd=$1
+ shift
+ sudo -nE "${cmd}" "$@"
+}
+
+setup()
+{
+ local ns x
+
+ if [ -e CMakeCache.txt ]; then
+ # running under cmake build dir
+
+ CEPH_SRC=$(readlink -f $(dirname $0)/../../../src)
+ CEPH_ROOT=${PWD}
+ CEPH_BIN=${CEPH_ROOT}/bin
+
+ export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+ export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind
+ for x in ${CEPH_ROOT}/lib/cython_modules/lib* ; do
+ PYTHONPATH="${PYTHONPATH}:${x}"
+ done
+ PATH=${CEPH_BIN}:${PATH}
+ fi
+
+ _sudo echo test sudo
+
+ trap cleanup INT TERM EXIT
+ TEMPDIR=`mktemp -d`
+ DATA=${TEMPDIR}/data
+ dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+
+ rbd namespace create ${POOL}/${NS}
+
+ for ns in '' ${NS}; do
+ rbd --dest-pool ${POOL} --dest-namespace "${ns}" --no-progress import \
+ ${DATA} ${IMAGE}
+ done
+}
+
+function cleanup()
+{
+ local ns s
+
+ set +e
+ rm -Rf ${TEMPDIR}
+ if [ -n "${DEV}" ]
+ then
+ _sudo rbd-nbd unmap ${DEV}
+ fi
+
+ for ns in '' ${NS}; do
+ if rbd -p ${POOL} --namespace "${ns}" status ${IMAGE} 2>/dev/null; then
+ for s in 0.5 1 2 4 8 16 32; do
+ sleep $s
+ rbd -p ${POOL} --namespace "${ns}" status ${IMAGE} |
+ grep 'Watchers: none' && break
+ done
+ rbd -p ${POOL} --namespace "${ns}" snap purge ${IMAGE}
+ rbd -p ${POOL} --namespace "${ns}" remove ${IMAGE}
+ fi
+ done
+ rbd namespace remove ${POOL}/${NS}
+}
+
+function expect_false()
+{
+ if "$@"; then return 1; else return 0; fi
+}
+
+function get_pid()
+{
+ local ns=$1
+
+ PID=$(rbd-nbd --format xml list-mapped | $XMLSTARLET sel -t -v \
+ "//devices/device[pool='${POOL}'][namespace='${ns}'][image='${IMAGE}'][device='${DEV}']/id")
+ test -n "${PID}"
+ ps -p ${PID} -C rbd-nbd
+}
+
+unmap_device()
+{
+ local dev=$1
+ local pid=$2
+ _sudo rbd-nbd unmap ${dev}
+
+ for s in 0.5 1 2 4 8 16 32; do
+ sleep ${s}
+ rbd-nbd list-mapped | expect_false grep "^${pid}\\b" &&
+ ps -C rbd-nbd | expect_false grep "^${pid}\\b" &&
+ return 0
+ done
+ return 1
+}
+
+#
+# main
+#
+
+setup
+
+# exit status test
+expect_false rbd-nbd
+expect_false rbd-nbd INVALIDCMD
+if [ `id -u` -ne 0 ]
+then
+ expect_false rbd-nbd map ${IMAGE}
+fi
+expect_false _sudo rbd-nbd map INVALIDIMAGE
+expect_false _sudo rbd-nbd --device INVALIDDEV map ${IMAGE}
+
+# list format test
+expect_false rbd-nbd --format INVALID list-mapped
+rbd-nbd --format json --pretty-format list-mapped
+rbd-nbd --format xml list-mapped
+
+# map test using the first unused device
+DEV=`_sudo rbd-nbd map ${POOL}/${IMAGE}`
+get_pid
+# map test specifying the device
+expect_false _sudo rbd-nbd --device ${DEV} map ${POOL}/${IMAGE}
+dev1=${DEV}
+unmap_device ${DEV} ${PID}
+DEV=
+# XXX: race possible when the device is reused by other process
+DEV=`_sudo rbd-nbd --device ${dev1} map ${POOL}/${IMAGE}`
+[ "${DEV}" = "${dev1}" ]
+rbd-nbd list-mapped | grep "${IMAGE}"
+get_pid
+
+# read test
+[ "`dd if=${DATA} bs=1M | md5sum`" = "`_sudo dd if=${DEV} bs=1M | md5sum`" ]
+
+# write test
+dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+_sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct
+[ "`dd if=${DATA} bs=1M | md5sum`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5sum`" ]
+
+# trim test
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+_sudo mkfs.ext4 -E discard ${DEV} # better idea?
+sync
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+ $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -lt "${provisioned}" ]
+
+# resize test
+devname=$(basename ${DEV})
+blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
+test -n "${blocks}"
+rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M
+rbd info ${POOL}/${IMAGE}
+blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
+test -n "${blocks2}"
+test ${blocks2} -eq $((blocks * 2))
+rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
+blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
+test -n "${blocks2}"
+test ${blocks2} -eq ${blocks}
+
+# read-only option test
+unmap_device ${DEV} ${PID}
+DEV=`_sudo rbd-nbd map --read-only ${POOL}/${IMAGE}`
+PID=$(rbd-nbd list-mapped | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \
+ '$2 == pool && $3 == img && $5 == dev {print $1}')
+test -n "${PID}"
+ps -p ${PID} -C rbd-nbd
+
+_sudo dd if=${DEV} of=/dev/null bs=1M
+expect_false _sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct
+unmap_device ${DEV} ${PID}
+
+# exclusive option test
+DEV=`_sudo rbd-nbd map --exclusive ${POOL}/${IMAGE}`
+get_pid
+
+_sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct
+expect_false timeout 10 \
+ rbd bench ${IMAGE} --io-type write --io-size=1024 --io-total=1024
+unmap_device ${DEV} ${PID}
+DEV=
+rbd bench ${IMAGE} --io-type write --io-size=1024 --io-total=1024
+
+# unmap by image name test
+DEV=`_sudo rbd-nbd map ${POOL}/${IMAGE}`
+get_pid
+unmap_device ${IMAGE} ${PID}
+DEV=
+
+# map/unmap snap test
+rbd snap create ${POOL}/${IMAGE}@snap
+DEV=`_sudo rbd-nbd map ${POOL}/${IMAGE}@snap`
+get_pid
+unmap_device "${IMAGE}@snap" ${PID}
+DEV=
+
+# map/unmap namespace test
+rbd snap create ${POOL}/${NS}/${IMAGE}@snap
+DEV=`_sudo rbd-nbd map ${POOL}/${NS}/${IMAGE}@snap`
+get_pid ${NS}
+unmap_device "${POOL}/${NS}/${IMAGE}@snap" ${PID}
+DEV=
+
+# unmap by image name test 2
+DEV=`_sudo rbd-nbd map ${POOL}/${IMAGE}`
+get_pid
+pid=$PID
+DEV=`_sudo rbd-nbd map ${POOL}/${NS}/${IMAGE}`
+get_pid ${NS}
+unmap_device ${POOL}/${NS}/${IMAGE} ${PID}
+DEV=
+unmap_device ${POOL}/${IMAGE} ${pid}
+
+# auto unmap test
+DEV=`_sudo rbd-nbd map ${POOL}/${IMAGE}`
+get_pid
+_sudo kill ${PID}
+for i in `seq 10`; do
+ rbd-nbd list-mapped | expect_false grep "^${PID} *${POOL} *${IMAGE}" && break
+ sleep 1
+done
+rbd-nbd list-mapped | expect_false grep "^${PID} *${POOL} *${IMAGE}"
+
+echo OK
diff --git a/qa/workunits/rbd/rbd_groups.sh b/qa/workunits/rbd/rbd_groups.sh
new file mode 100755
index 00000000..2c9fd318
--- /dev/null
+++ b/qa/workunits/rbd/rbd_groups.sh
@@ -0,0 +1,209 @@
+#!/bin/sh -ex
+
+#
+# rbd_consistency_groups.sh - test consistency groups cli commands
+#
+
+#
+# Functions
+#
+
+create_group()
+{
+ local group_name=$1
+
+ rbd group create $group_name
+}
+
+list_groups()
+{
+ rbd group list
+}
+
+check_group_exists()
+{
+ local group_name=$1
+ list_groups | grep $group_name
+}
+
+remove_group()
+{
+ local group_name=$1
+
+ rbd group remove $group_name
+}
+
+rename_group()
+{
+ local src_name=$1
+ local dest_name=$2
+
+ rbd group rename $src_name $dest_name
+}
+
+check_group_does_not_exist()
+{
+ local group_name=$1
+ for v in $(list_groups); do
+ if [ "$v" == "$group_name" ]; then
+ return 1
+ fi
+ done
+ return 0
+}
+
+create_image()
+{
+ local image_name=$1
+ rbd create --size 10M $image_name
+}
+
+remove_image()
+{
+ local image_name=$1
+ rbd remove $image_name
+}
+
+add_image_to_group()
+{
+ local image_name=$1
+ local group_name=$2
+ rbd group image add $group_name $image_name
+}
+
+remove_image_from_group()
+{
+ local image_name=$1
+ local group_name=$2
+ rbd group image remove $group_name $image_name
+}
+
+check_image_in_group()
+{
+ local image_name=$1
+ local group_name=$2
+ for v in $(rbd group image list $group_name); do
+ local vtrimmed=${v#*/}
+ if [ "$vtrimmed" = "$image_name" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+check_image_not_in_group()
+{
+ local image_name=$1
+ local group_name=$2
+ for v in $(rbd group image list $group_name); do
+ local vtrimmed=${v#*/}
+ if [ "$vtrimmed" = "$image_name" ]; then
+ return 1
+ fi
+ done
+ return 0
+}
+
+create_snapshot()
+{
+ local group_name=$1
+ local snap_name=$2
+ rbd group snap create $group_name@$snap_name
+}
+
+remove_snapshot()
+{
+ local group_name=$1
+ local snap_name=$2
+ rbd group snap remove $group_name@$snap_name
+}
+
+rename_snapshot()
+{
+ local group_name=$1
+ local snap_name=$2
+ local new_snap_name=$3
+ rbd group snap rename $group_name@$snap_name $new_snap_name
+}
+
+list_snapshots()
+{
+ local group_name=$1
+ rbd group snap list $group_name
+}
+
+rollback_snapshot()
+{
+ local group_name=$1
+ local snap_name=$2
+ rbd group snap rollback $group_name@$snap_name
+}
+
+check_snapshot_in_group()
+{
+ local group_name=$1
+ local snap_name=$2
+ list_snapshots $group_name | grep $snap_name
+}
+
+check_snapshot_not_in_group()
+{
+ local group_name=$1
+ local snap_name=$2
+ for v in $(list_snapshots $group_name | awk '{print $1}'); do
+ if [ "$v" = "$snap_name" ]; then
+ return 1
+ fi
+ done
+ return 0
+}
+
+echo "TEST: create remove consistency group"
+group="test_consistency_group"
+new_group="test_new_consistency_group"
+create_group $group
+check_group_exists $group
+rename_group $group $new_group
+check_group_exists $new_group
+remove_group $new_group
+check_group_does_not_exist $new_group
+echo "PASSED"
+
+echo "TEST: add remove images to consistency group"
+image="test_image"
+group="test_consistency_group"
+create_image $image
+create_group $group
+add_image_to_group $image $group
+check_image_in_group $image $group
+remove_image_from_group $image $group
+check_image_not_in_group $image $group
+remove_group $group
+remove_image $image
+echo "PASSED"
+
+echo "TEST: create remove snapshots of consistency group"
+image="test_image"
+group="test_consistency_group"
+snap="group_snap"
+new_snap="new_group_snap"
+sec_snap="group_snap2"
+create_image $image
+create_group $group
+add_image_to_group $image $group
+create_snapshot $group $snap
+check_snapshot_in_group $group $snap
+rename_snapshot $group $snap $new_snap
+check_snapshot_not_in_group $group $snap
+create_snapshot $group $sec_snap
+check_snapshot_in_group $group $sec_snap
+rollback_snapshot $group $new_snap
+remove_snapshot $group $new_snap
+check_snapshot_not_in_group $group $new_snap
+remove_snapshot $group $sec_snap
+check_snapshot_not_in_group $group $sec_snap
+remove_group $group
+remove_image $image
+echo "PASSED"
+
+echo "OK"
diff --git a/qa/workunits/rbd/rbd_mirror.sh b/qa/workunits/rbd/rbd_mirror.sh
new file mode 100755
index 00000000..45e93e65
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror.sh
@@ -0,0 +1,516 @@
+#!/bin/sh -ex
+#
+# rbd_mirror.sh - test rbd-mirror daemon
+#
+# The scripts starts two ("local" and "remote") clusters using mstart.sh script,
+# creates a temporary directory, used for cluster configs, daemon logs, admin
+# socket, temporary files, and launches rbd-mirror daemon.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+testlog "TEST: add image and test replay"
+start_mirrors ${CLUSTER1}
+image=test
+create_image ${CLUSTER2} ${POOL} ${image}
+set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1"
+set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown'
+fi
+compare_images ${POOL} ${image}
+compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1"
+compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2"
+
+testlog "TEST: stop mirror, add image, start mirror and test replay"
+stop_mirrors ${CLUSTER1}
+image1=test1
+create_image ${CLUSTER2} ${POOL} ${image1}
+write_image ${CLUSTER2} ${POOL} ${image1} 100
+start_mirrors ${CLUSTER1}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' 'master_position'
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown'
+fi
+compare_images ${POOL} ${image1}
+
+testlog "TEST: test the first image is replaying after restart"
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${image}
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ testlog "TEST: stop/start/restart mirror via admin socket"
+ all_admin_daemons ${CLUSTER1} rbd mirror stop
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror start
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror restart
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror stop
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror restart
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+ admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+ admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+
+ all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+ all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+ flush ${CLUSTER1}
+ all_admin_daemons ${CLUSTER1} rbd mirror status
+fi
+
+testlog "TEST: test image rename"
+new_name="${image}_RENAMED"
+rename_image ${CLUSTER2} ${POOL} ${image} ${new_name}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying'
+admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name}
+admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying'
+rename_image ${CLUSTER2} ${POOL} ${new_name} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: test trash move restore"
+image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image})
+trash_move ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+trash_restore ${CLUSTER2} ${POOL} ${image_id}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: failover and failback"
+start_mirrors ${CLUSTER2}
+
+# demote and promote same cluster
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${image}
+
+# failover (unmodified)
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+
+# failback (unmodified)
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+compare_images ${POOL} ${image}
+
+# failover
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+write_image ${CLUSTER1} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${image}
+
+# failback
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+compare_images ${POOL} ${image}
+
+# force promote
+force_promote_image=test_force_promote
+create_image ${CLUSTER2} ${POOL} ${force_promote_image}
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' 'master_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force'
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+
+testlog "TEST: cloned images"
+testlog " - default"
+parent_image=test_parent
+parent_snap=snap
+create_image ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+clone_image=test_clone
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}
+write_image ${CLUSTER2} ${POOL} ${clone_image} 100
+
+enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying' 'master_position'
+compare_images ${PARENT_POOL} ${parent_image}
+
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${clone_image}
+
+testlog " - clone v1"
+clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}1
+
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
+ ${clone_image}_v1 --rbd-default-clone-format 1
+test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1
+test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1
+
+testlog " - clone v2"
+parent_snap=snap_v2
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
+ ${clone_image}_v2 --rbd-default-clone-format 2
+test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2
+test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2
+
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2
+wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted'
+test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+testlog " - clone v2 non-primary"
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
+ ${clone_image}_v2 --rbd-default-clone-format 2
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2
+wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+testlog "TEST: data pool"
+dp_image=test_data_pool
+create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL}
+data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image}
+data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${dp_image}@snap1
+compare_images ${POOL} ${dp_image}@snap2
+compare_images ${POOL} ${dp_image}
+
+testlog "TEST: disable mirroring / delete non-primary image"
+image2=test2
+image3=test3
+image4=test4
+image5=test5
+for i in ${image2} ${image3} ${image4} ${image5}; do
+ create_image ${CLUSTER2} ${POOL} ${i}
+ write_image ${CLUSTER2} ${POOL} ${i} 100
+ create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+ create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+ if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then
+ protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+ protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+ fi
+ write_image ${CLUSTER2} ${POOL} ${i} 100
+ wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present'
+ wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2'
+done
+
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+for i in ${image2} ${image4}; do
+ disable_mirror ${CLUSTER2} ${POOL} ${i}
+done
+
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2'
+for i in ${image3} ${image5}; do
+ remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+ remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+ remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
+for i in ${image2} ${image3} ${image4} ${image5}; do
+ wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted'
+done
+
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool'
+for i in ${image2} ${image4}; do
+ wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present'
+ wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2'
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${i}
+ wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${i}
+ compare_images ${POOL} ${i}
+done
+
+testlog "TEST: remove mirroring pool"
+pool=pool_to_remove
+for cluster in ${CLUSTER1} ${CLUSTER2}; do
+ CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${pool} 16 16
+ CEPH_ARGS='' rbd --cluster ${cluster} pool init ${pool}
+ rbd --cluster ${cluster} mirror pool enable ${pool} pool
+done
+rbd --cluster ${CLUSTER1} mirror pool peer add ${pool} ${CLUSTER2}
+rbd --cluster ${CLUSTER2} mirror pool peer add ${pool} ${CLUSTER1}
+rdp_image=test_remove_data_pool
+create_image ${CLUSTER2} ${pool} ${image} 128
+create_image ${CLUSTER2} ${POOL} ${rdp_image} 128 --data-pool ${pool}
+write_image ${CLUSTER2} ${pool} ${image} 100
+write_image ${CLUSTER2} ${POOL} ${rdp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${pool} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${pool} ${image} 'up+replaying' 'master_position'
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${rdp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${rdp_image} 'up+replaying' 'master_position'
+for cluster in ${CLUSTER1} ${CLUSTER2}; do
+ CEPH_ARGS='' ceph --cluster ${cluster} osd pool rm ${pool} ${pool} --yes-i-really-really-mean-it
+done
+remove_image_retry ${CLUSTER2} ${POOL} ${rdp_image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${rdp_image} 'deleted'
+for i in 0 1 2 4 8 8 8 8 16 16; do
+ sleep $i
+ admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} || break
+done
+admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} && false
+
+testlog "TEST: snapshot rename"
+snap_name='snap_rename'
+create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0"
+for i in `seq 1 20`; do
+ rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}"
+done
+wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}"
+
+testlog "TEST: disable mirror while daemon is stopped"
+stop_mirrors ${CLUSTER1}
+stop_mirrors ${CLUSTER2}
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+disable_mirror ${CLUSTER2} ${POOL} ${image}
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ test_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+fi
+start_mirrors ${CLUSTER1}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool'
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: simple image resync"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${image}
+
+testlog "TEST: image resync while replayer is stopped"
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+ admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+ admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+ compare_images ${POOL} ${image}
+fi
+
+testlog "TEST: request image resync while daemon is offline"
+stop_mirrors ${CLUSTER1}
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+start_mirrors ${CLUSTER1}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${image}
+
+testlog "TEST: client disconnect"
+image=laggy
+create_image ${CLUSTER2} ${POOL} ${image} 128 --journal-object-size 64K
+write_image ${CLUSTER2} ${POOL} ${image} 10
+
+testlog " - replay stopped after disconnect"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+disconnect_image ${CLUSTER2} ${POOL} ${image}
+test -z "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+
+testlog " - replay started after resync requested"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+compare_images ${POOL} ${image}
+
+testlog " - disconnected after max_concurrent_object_sets reached"
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+ set_image_meta ${CLUSTER2} ${POOL} ${image} \
+ conf_rbd_journal_max_concurrent_object_sets 1
+ write_image ${CLUSTER2} ${POOL} ${image} 20 16384
+ write_image ${CLUSTER2} ${POOL} ${image} 20 16384
+ test -z "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+ set_image_meta ${CLUSTER2} ${POOL} ${image} \
+ conf_rbd_journal_max_concurrent_object_sets 0
+
+ testlog " - replay is still stopped (disconnected) after restart"
+ admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+ wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+fi
+
+testlog " - replay started after resync requested"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+compare_images ${POOL} ${image}
+
+testlog " - rbd_mirroring_resync_after_disconnect config option"
+set_image_meta ${CLUSTER2} ${POOL} ${image} \
+ conf_rbd_mirroring_resync_after_disconnect true
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+image_id=$(get_image_id ${CLUSTER1} ${POOL} ${image})
+disconnect_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+compare_images ${POOL} ${image}
+set_image_meta ${CLUSTER2} ${POOL} ${image} \
+ conf_rbd_mirroring_resync_after_disconnect false
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+disconnect_image ${CLUSTER2} ${POOL} ${image}
+test -z "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+
+testlog "TEST: split-brain"
+image=split-brain
+create_image ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+promote_image ${CLUSTER1} ${POOL} ${image} --force
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${image} 10
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain'
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ # teuthology will trash the daemon
+ testlog "TEST: no blacklists"
+ CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
+ CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
+fi
diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
new file mode 100755
index 00000000..e0a096ee
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
@@ -0,0 +1,49 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_bootstrap.sh - test peer bootstrap create/import
+#
+
+RBD_MIRROR_MANUAL_PEERS=1
+RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1}
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+testlog "TEST: bootstrap cluster2 from cluster1"
+# create token on cluster1 and import to cluster2
+TOKEN=${TEMPDIR}/peer-token
+TOKEN_2=${TEMPDIR}/peer-token-2
+CEPH_ARGS='' rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${POOL} > ${TOKEN}
+CEPH_ARGS='' rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${PARENT_POOL} > ${TOKEN_2}
+cmp ${TOKEN} ${TOKEN_2}
+
+CEPH_ARGS='' rbd --cluster ${CLUSTER2} --pool ${POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-only
+CEPH_ARGS='' rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-tx
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+testlog "TEST: verify rx-only direction"
+[ "$(rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format xml |
+ ${XMLSTARLET} sel -t -v '//mirror/peers/peer[1]/uuid')" = "" ]
+
+create_image ${CLUSTER1} ${POOL} image1
+
+wait_for_image_replay_started ${CLUSTER2} ${POOL} image1
+write_image ${CLUSTER1} ${POOL} image1 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} image1
+
+testlog "TEST: verify rx-tx direction"
+create_image ${CLUSTER1} ${PARENT_POOL} image1
+create_image ${CLUSTER2} ${PARENT_POOL} image2
+
+enable_mirror ${CLUSTER1} ${PARENT_POOL} image1
+enable_mirror ${CLUSTER2} ${PARENT_POOL} image2
+
+wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1
+write_image ${CLUSTER1} ${PARENT_POOL} image1 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} image1
+
+wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} image2
+write_image ${CLUSTER2} ${PARENT_POOL} image2 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} image2
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
new file mode 100755
index 00000000..0ba3c97d
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
@@ -0,0 +1,38 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+trap 'cleanup $?' INT TERM EXIT
+
+setup_tempdir
+
+testlog "TEST: wait for all images"
+image_count=$(rbd --cluster ${CLUSTER1} --pool ${POOL} ls | wc -l)
+retrying_seconds=0
+sleep_seconds=10
+while [ ${retrying_seconds} -le 7200 ]; do
+ [ $(rbd --cluster ${CLUSTER2} --pool ${POOL} ls | wc -l) -ge ${image_count} ] && break
+ sleep ${sleep_seconds}
+ retrying_seconds=$(($retrying_seconds+${sleep_seconds}))
+done
+
+testlog "TEST: snapshot all pool images"
+snap_id=`uuidgen`
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+ create_snapshot ${CLUSTER1} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: wait for snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+ wait_for_snap_present ${CLUSTER2} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: compare image snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+ compare_image_snapshots ${POOL} ${image}
+done
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
new file mode 100755
index 00000000..d988987b
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
@@ -0,0 +1,10 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh
new file mode 100755
index 00000000..171ab569
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_ha.sh
@@ -0,0 +1,210 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
+#
+
+RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7}
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+is_leader()
+{
+ local instance=$1
+ local pool=$2
+
+ test -n "${pool}" || pool=${POOL}
+
+ admin_daemon "${CLUSTER1}:${instance}" \
+ rbd mirror status ${pool} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} |
+ grep '"leader": true'
+}
+
+wait_for_leader()
+{
+ local s instance
+
+ for s in 1 1 2 4 4 4 4 4 8 8 8 8 16 16 32 64; do
+ sleep $s
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ is_leader ${instance} || continue
+ LEADER=${instance}
+ return 0
+ done
+ done
+
+ LEADER=
+ return 1
+}
+
+release_leader()
+{
+ local pool=$1
+ local cmd="rbd mirror leader release"
+
+ test -n "${pool}" && cmd="${cmd} ${pool} ${CLUSTER2}"
+
+ admin_daemon "${CLUSTER1}:${LEADER}" ${cmd}
+}
+
+wait_for_leader_released()
+{
+ local i
+
+ test -n "${LEADER}"
+ for i in `seq 10`; do
+ is_leader ${LEADER} || return 0
+ sleep 1
+ done
+
+ return 1
+}
+
+test_replay()
+{
+ local image
+
+ for image; do
+ wait_for_image_replay_started ${CLUSTER1}:${LEADER} ${POOL} ${image}
+ write_image ${CLUSTER2} ${POOL} ${image} 100
+ wait_for_replay_complete ${CLUSTER1}:${LEADER} ${CLUSTER2} ${POOL} \
+ ${image}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' \
+ 'master_position' \
+ "${MIRROR_USER_ID_PREFIX}${LEADER} on $(hostname -s)"
+ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+ wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} \
+ 'down+unknown'
+ fi
+ compare_images ${POOL} ${image}
+ done
+}
+
+testlog "TEST: start first daemon instance and test replay"
+start_mirror ${CLUSTER1}:0
+image1=test1
+create_image ${CLUSTER2} ${POOL} ${image1}
+LEADER=0
+test_replay ${image1}
+
+testlog "TEST: release leader and wait it is reacquired"
+is_leader 0 ${POOL}
+is_leader 0 ${PARENT_POOL}
+release_leader ${POOL}
+wait_for_leader_released
+is_leader 0 ${PARENT_POOL}
+wait_for_leader
+release_leader
+wait_for_leader_released
+expect_failure "" is_leader 0 ${PARENT_POOL}
+wait_for_leader
+
+testlog "TEST: start second daemon instance and test replay"
+start_mirror ${CLUSTER1}:1
+image2=test2
+create_image ${CLUSTER2} ${POOL} ${image2}
+test_replay ${image1} ${image2}
+
+testlog "TEST: release leader and test it is acquired by secondary"
+is_leader 0 ${POOL}
+is_leader 0 ${PARENT_POOL}
+release_leader ${POOL}
+wait_for_leader_released
+wait_for_leader
+test_replay ${image1} ${image2}
+release_leader
+wait_for_leader_released
+wait_for_leader
+test "${LEADER}" = 0
+
+testlog "TEST: stop first daemon instance and test replay"
+stop_mirror ${CLUSTER1}:0
+image3=test3
+create_image ${CLUSTER2} ${POOL} ${image3}
+LEADER=1
+test_replay ${image1} ${image2} ${image3}
+
+testlog "TEST: start first daemon instance and test replay"
+start_mirror ${CLUSTER1}:0
+image4=test4
+create_image ${CLUSTER2} ${POOL} ${image4}
+test_replay ${image3} ${image4}
+
+testlog "TEST: crash leader and test replay"
+stop_mirror ${CLUSTER1}:1 -KILL
+image5=test5
+create_image ${CLUSTER2} ${POOL} ${image5}
+LEADER=0
+test_replay ${image1} ${image4} ${image5}
+
+testlog "TEST: start crashed leader and test replay"
+start_mirror ${CLUSTER1}:1
+image6=test6
+create_image ${CLUSTER2} ${POOL} ${image6}
+test_replay ${image1} ${image6}
+
+testlog "TEST: start yet another daemon instance and test replay"
+start_mirror ${CLUSTER1}:2
+image7=test7
+create_image ${CLUSTER2} ${POOL} ${image7}
+test_replay ${image1} ${image7}
+
+testlog "TEST: release leader and test it is acquired by secondary"
+is_leader 0
+release_leader
+wait_for_leader_released
+wait_for_leader
+test_replay ${image1} ${image2}
+
+testlog "TEST: stop leader and test replay"
+stop_mirror ${CLUSTER1}:${LEADER}
+image8=test8
+create_image ${CLUSTER2} ${POOL} ${image8}
+prev_leader=${LEADER}
+wait_for_leader
+test_replay ${image1} ${image8}
+
+testlog "TEST: start previous leader and test replay"
+start_mirror ${CLUSTER1}:${prev_leader}
+image9=test9
+create_image ${CLUSTER2} ${POOL} ${image9}
+test_replay ${image1} ${image9}
+
+testlog "TEST: crash leader and test replay"
+stop_mirror ${CLUSTER1}:${LEADER} -KILL
+image10=test10
+create_image ${CLUSTER2} ${POOL} ${image10}
+prev_leader=${LEADER}
+wait_for_leader
+test_replay ${image1} ${image10}
+
+testlog "TEST: start previous leader and test replay"
+start_mirror ${CLUSTER1}:${prev_leader}
+image11=test11
+create_image ${CLUSTER2} ${POOL} ${image11}
+test_replay ${image1} ${image11}
+
+testlog "TEST: start some more daemon instances and test replay"
+start_mirror ${CLUSTER1}:3
+start_mirror ${CLUSTER1}:4
+start_mirror ${CLUSTER1}:5
+start_mirror ${CLUSTER1}:6
+image13=test13
+create_image ${CLUSTER2} ${POOL} ${image13}
+test_replay ${leader} ${image1} ${image13}
+
+testlog "TEST: release leader and test it is acquired by secondary"
+release_leader
+wait_for_leader_released
+wait_for_leader
+test_replay ${image1} ${image2}
+
+testlog "TEST: in loop: stop leader and test replay"
+for i in 0 1 2 3 4 5; do
+ stop_mirror ${CLUSTER1}:${LEADER}
+ wait_for_leader
+ test_replay ${image1}
+done
+
+stop_mirror ${CLUSTER1}:${LEADER}
diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh
new file mode 100755
index 00000000..563e256c
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_helpers.sh
@@ -0,0 +1,1211 @@
+#!/bin/sh
+#
+# rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
+#
+# The scripts starts two ("local" and "remote") clusters using mstart.sh script,
+# creates a temporary directory, used for cluster configs, daemon logs, admin
+# socket, temporary files, and launches rbd-mirror daemon.
+#
+# There are several env variables useful when troubleshooting a test failure:
+#
+# RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes,
+# destroy the clusters and remove the temp directory)
+# on exit, so it is possible to check the test state
+# after failure.
+# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory
+# (should not exist) instead of running mktemp(1).
+# RBD_MIRROR_ARGS - use this to pass additional arguments to started
+# rbd-mirror daemons.
+# RBD_MIRROR_VARGS - use this to pass additional arguments to vstart.sh
+# when starting clusters.
+# RBD_MIRROR_INSTANCES - number of daemons to start per cluster
+# RBD_MIRROR_CONFIG_KEY - if not empty, use config-key for remote cluster
+# secrets
+# The cleanup can be done as a separate step, running the script with
+# `cleanup ${RBD_MIRROR_TEMDIR}' arguments.
+#
+# Note, as other workunits tests, rbd_mirror.sh expects to find ceph binaries
+# in PATH.
+#
+# Thus a typical troubleshooting session:
+#
+# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with
+# TEMPDIR pointing to a known location:
+#
+# cd $CEPH_SRC_PATH
+# PATH=$CEPH_SRC_PATH:$PATH
+# RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
+# ../qa/workunits/rbd/rbd_mirror.sh
+#
+# After the test failure cd to TEMPDIR and check the current state:
+#
+# cd /tmp/tmp.rbd_mirror
+# ls
+# less rbd-mirror.cluster1_daemon.$pid.log
+# ceph --cluster cluster1 -s
+# ceph --cluster cluster1 -s
+# rbd --cluster cluster2 -p mirror ls
+# rbd --cluster cluster2 -p mirror journal status --image test
+# ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help
+# ...
+#
+# Also you can execute commands (functions) from the script:
+#
+# cd $CEPH_SRC_PATH
+# export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror
+# ../qa/workunits/rbd/rbd_mirror.sh status
+# ../qa/workunits/rbd/rbd_mirror.sh stop_mirror cluster1
+# ../qa/workunits/rbd/rbd_mirror.sh start_mirror cluster2
+# ../qa/workunits/rbd/rbd_mirror.sh flush cluster2
+# ...
+#
+# Eventually, run the cleanup:
+#
+# cd $CEPH_SRC_PATH
+# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
+# ../qa/workunits/rbd/rbd_mirror.sh cleanup
+#
+
+if type xmlstarlet > /dev/null 2>&1; then
+ XMLSTARLET=xmlstarlet
+elif type xml > /dev/null 2>&1; then
+ XMLSTARLET=xml
+else
+ echo "Missing xmlstarlet binary!"
+ exit 1
+fi
+
+RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-2}
+
+CLUSTER1=cluster1
+CLUSTER2=cluster2
+PEER_CLUSTER_SUFFIX=
+POOL=mirror
+PARENT_POOL=mirror_parent
+TEMPDIR=
+CEPH_ID=${CEPH_ID:-mirror}
+MIRROR_USER_ID_PREFIX=${MIRROR_USER_ID_PREFIX:-${CEPH_ID}.}
+export CEPH_ARGS="--id ${CEPH_ID}"
+
+LAST_MIRROR_INSTANCE=$((${RBD_MIRROR_INSTANCES} - 1))
+
+CEPH_ROOT=$(readlink -f $(dirname $0)/../../../src)
+CEPH_BIN=.
+CEPH_SRC=.
+if [ -e CMakeCache.txt ]; then
+ CEPH_SRC=${CEPH_ROOT}
+ CEPH_ROOT=${PWD}
+ CEPH_BIN=./bin
+
+ # needed for ceph CLI under cmake
+ export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+ export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind
+ for x in ${CEPH_ROOT}/lib/cython_modules/lib* ; do
+ export PYTHONPATH="${PYTHONPATH}:${x}"
+ done
+fi
+
+# These vars facilitate running this script in an environment with
+# ceph installed from packages, like teuthology. These are not defined
+# by default.
+#
+# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters
+# RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror
+# running as ceph client $CEPH_ID. If empty,
+# this script will start and stop rbd-mirror
+
+#
+# Functions
+#
+
+# Parse a value in format cluster[:instance] and set cluster and instance vars.
+set_cluster_instance()
+{
+ local val=$1
+ local cluster_var_name=$2
+ local instance_var_name=$3
+
+ cluster=${val%:*}
+ instance=${val##*:}
+
+ if [ "${instance}" = "${val}" ]; then
+ # instance was not specified, use default
+ instance=0
+ fi
+
+ eval ${cluster_var_name}=${cluster}
+ eval ${instance_var_name}=${instance}
+}
+
+daemon_asok_file()
+{
+ local local_cluster=$1
+ local cluster=$2
+ local instance
+
+ set_cluster_instance "${local_cluster}" local_cluster instance
+
+ echo $(ceph-conf --cluster $local_cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'admin socket')
+}
+
+daemon_pid_file()
+{
+ local cluster=$1
+ local instance
+
+ set_cluster_instance "${cluster}" cluster instance
+
+ echo $(ceph-conf --cluster $cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'pid file')
+}
+
+testlog()
+{
+ echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" >&2
+}
+
+expect_failure()
+{
+ local expected="$1" ; shift
+ local out=${TEMPDIR}/expect_failure.out
+
+ if "$@" > ${out} 2>&1 ; then
+ cat ${out} >&2
+ return 1
+ fi
+
+ if [ -z "${expected}" ]; then
+ return 0
+ fi
+
+ if ! grep -q "${expected}" ${out} ; then
+ cat ${out} >&2
+ return 1
+ fi
+
+ return 0
+}
+
+create_users()
+{
+ local cluster=$1
+
+ CEPH_ARGS='' ceph --cluster "${cluster}" \
+ auth get-or-create client.${CEPH_ID} \
+ mon 'profile rbd' osd 'profile rbd' mgr 'profile rbd' >> \
+ ${CEPH_ROOT}/run/${cluster}/keyring
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ CEPH_ARGS='' ceph --cluster "${cluster}" \
+ auth get-or-create client.${MIRROR_USER_ID_PREFIX}${instance} \
+ mon 'profile rbd-mirror' osd 'profile rbd' mgr 'profile rbd' >> \
+ ${CEPH_ROOT}/run/${cluster}/keyring
+ done
+}
+
+update_users()
+{
+ local cluster=$1
+
+ CEPH_ARGS='' ceph --cluster "${cluster}" \
+ auth caps client.${CEPH_ID} \
+ mon 'profile rbd' osd 'profile rbd'
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ CEPH_ARGS='' ceph --cluster "${cluster}" \
+ auth caps client.${MIRROR_USER_ID_PREFIX}${instance} \
+ mon 'profile rbd-mirror' osd 'profile rbd'
+ done
+}
+
+setup_cluster()
+{
+ local cluster=$1
+
+ CEPH_ARGS='' ${CEPH_SRC}/mstart.sh ${cluster} -n ${RBD_MIRROR_VARGS}
+
+ cd ${CEPH_ROOT}
+ rm -f ${TEMPDIR}/${cluster}.conf
+ ln -s $(readlink -f run/${cluster}/ceph.conf) \
+ ${TEMPDIR}/${cluster}.conf
+
+ cd ${TEMPDIR}
+ create_users "${cluster}"
+
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ cat<<EOF >> ${TEMPDIR}/${cluster}.conf
+[client.${MIRROR_USER_ID_PREFIX}${instance}]
+ admin socket = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.asok
+ pid file = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.pid
+ log file = ${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.log
+EOF
+ done
+}
+
+setup_pools()
+{
+ local cluster=$1
+ local remote_cluster=$2
+ local mon_map_file
+ local mon_addr
+ local admin_key_file
+ local uuid
+
+ CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${POOL} 64 64
+ CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${PARENT_POOL} 64 64
+
+ CEPH_ARGS='' rbd --cluster ${cluster} pool init ${POOL}
+ CEPH_ARGS='' rbd --cluster ${cluster} pool init ${PARENT_POOL}
+
+ CEPH_ARGS='' rbd --cluster ${cluster} mirror pool enable ${POOL} pool
+ rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL} image
+
+ if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then
+ if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then
+ rbd --cluster ${cluster} mirror pool peer add ${POOL} ${remote_cluster}
+ rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} ${remote_cluster}
+ else
+ mon_map_file=${TEMPDIR}/${remote_cluster}.monmap
+ CEPH_ARGS='' ceph --cluster ${remote_cluster} mon getmap > ${mon_map_file}
+ mon_addr=$(monmaptool --print ${mon_map_file} | grep -E 'mon\.' |
+ head -n 1 | sed -E 's/^[0-9]+: ([^ ]+).+$/\1/' | sed -E 's/\/[0-9]+//g')
+
+ admin_key_file=${TEMPDIR}/${remote_cluster}.client.${CEPH_ID}.key
+ CEPH_ARGS='' ceph --cluster ${remote_cluster} auth get-key client.${CEPH_ID} > ${admin_key_file}
+
+ CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer add ${POOL} \
+ client.${CEPH_ID}@${remote_cluster}-DNE \
+ --remote-mon-host "${mon_addr}" --remote-key-file ${admin_key_file}
+
+ uuid=$(rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} client.${CEPH_ID}@${remote_cluster}-DNE)
+ CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} mon-host ${mon_addr}
+ CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} key-file ${admin_key_file}
+
+ PEER_CLUSTER_SUFFIX=-DNE
+ fi
+ fi
+}
+
+setup_tempdir()
+{
+ if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
+ test -d "${RBD_MIRROR_TEMDIR}" ||
+ mkdir "${RBD_MIRROR_TEMDIR}"
+ TEMPDIR="${RBD_MIRROR_TEMDIR}"
+ cd ${TEMPDIR}
+ else
+ TEMPDIR=`mktemp -d`
+ fi
+}
+
+setup()
+{
+ local c
+ trap 'cleanup $?' INT TERM EXIT
+
+ setup_tempdir
+ if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
+ setup_cluster "${CLUSTER1}"
+ setup_cluster "${CLUSTER2}"
+ else
+ update_users "${CLUSTER1}"
+ update_users "${CLUSTER2}"
+ fi
+
+ setup_pools "${CLUSTER1}" "${CLUSTER2}"
+ setup_pools "${CLUSTER2}" "${CLUSTER1}"
+}
+
+cleanup()
+{
+ local error_code=$1
+
+ set +e
+
+ if [ "${error_code}" -ne 0 ]; then
+ status
+ fi
+
+ if [ -z "${RBD_MIRROR_NOCLEANUP}" ]; then
+ local cluster instance
+
+ CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+ CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+ CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+ CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+
+ for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
+ stop_mirrors "${cluster}"
+ done
+
+ if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
+ cd ${CEPH_ROOT}
+ CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
+ CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
+ fi
+ test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" || rm -Rf ${TEMPDIR}
+ fi
+
+ if [ "${error_code}" -eq 0 ]; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ exit ${error_code}
+}
+
+start_mirror()
+{
+ local cluster=$1
+ local instance
+
+ set_cluster_instance "${cluster}" cluster instance
+
+ test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
+
+ rbd-mirror \
+ --cluster ${cluster} \
+ --id ${MIRROR_USER_ID_PREFIX}${instance} \
+ --rbd-mirror-delete-retry-interval=5 \
+ --rbd-mirror-image-state-check-interval=5 \
+ --rbd-mirror-journal-poll-age=1 \
+ --rbd-mirror-pool-replayers-refresh-interval=5 \
+ --debug-rbd=30 --debug-journaler=30 \
+ --debug-rbd_mirror=30 \
+ --daemonize=true \
+ ${RBD_MIRROR_ARGS}
+}
+
+start_mirrors()
+{
+ local cluster=$1
+
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ start_mirror "${cluster}:${instance}"
+ done
+}
+
+stop_mirror()
+{
+ local cluster=$1
+ local sig=$2
+
+ test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
+
+ local pid
+ pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || :
+ if [ -n "${pid}" ]
+ then
+ kill ${sig} ${pid}
+ for s in 1 2 4 8 16 32; do
+ sleep $s
+ ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break
+ done
+ ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}'
+ fi
+ rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}")
+ rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}")
+ rm -f $(daemon_pid_file "${cluster}")
+}
+
+stop_mirrors()
+{
+ local cluster=$1
+ local sig=$2
+
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ stop_mirror "${cluster}:${instance}" "${sig}"
+ done
+}
+
+admin_daemon()
+{
+ local cluster=$1 ; shift
+ local instance
+
+ set_cluster_instance "${cluster}" cluster instance
+
+ local asok_file=$(daemon_asok_file "${cluster}:${instance}" "${cluster}")
+ test -S "${asok_file}"
+
+ ceph --admin-daemon ${asok_file} $@
+}
+
+admin_daemons()
+{
+ local cluster_instance=$1 ; shift
+ local cluster="${cluster_instance%:*}"
+ local instance="${cluster_instance##*:}"
+ local loop_instance
+
+ for s in 0 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+ sleep ${s}
+ if [ "${instance}" != "${cluster_instance}" ]; then
+ admin_daemon "${cluster}:${instance}" $@ && return 0
+ else
+ for loop_instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ admin_daemon "${cluster}:${loop_instance}" $@ && return 0
+ done
+ fi
+ done
+ return 1
+}
+
+all_admin_daemons()
+{
+ local cluster=$1 ; shift
+
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ admin_daemon "${cluster}:${instance}" $@
+ done
+}
+
+status()
+{
+ local cluster daemon image_pool image
+
+ for cluster in ${CLUSTER1} ${CLUSTER2}
+ do
+ echo "${cluster} status"
+ CEPH_ARGS='' ceph --cluster ${cluster} -s
+ CEPH_ARGS='' ceph --cluster ${cluster} service dump
+ CEPH_ARGS='' ceph --cluster ${cluster} service status
+ echo
+
+ for image_pool in ${POOL} ${PARENT_POOL}
+ do
+ echo "${cluster} ${image_pool} images"
+ rbd --cluster ${cluster} -p ${image_pool} ls -l
+ echo
+
+ echo "${cluster} ${image_pool} mirror pool status"
+ CEPH_ARGS='' rbd --cluster ${cluster} -p ${image_pool} mirror pool status --verbose
+ echo
+
+ for image in `rbd --cluster ${cluster} -p ${image_pool} ls 2>/dev/null`
+ do
+ echo "image ${image} info"
+ rbd --cluster ${cluster} -p ${image_pool} info ${image}
+ echo
+ echo "image ${image} journal status"
+ rbd --cluster ${cluster} -p ${image_pool} journal status --image ${image}
+ echo
+ done
+ done
+ done
+
+ local ret
+
+ for cluster in "${CLUSTER1}" "${CLUSTER2}"
+ do
+ for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+ local pid_file=$(daemon_pid_file ${cluster}:${instance})
+ if [ ! -e ${pid_file} ]
+ then
+ echo "${cluster} rbd-mirror not running or unknown" \
+ "(${pid_file} not exist)"
+ continue
+ fi
+
+ local pid
+ pid=$(cat ${pid_file} 2>/dev/null) || :
+ if [ -z "${pid}" ]
+ then
+ echo "${cluster} rbd-mirror not running or unknown" \
+ "(can't find pid using ${pid_file})"
+ ret=1
+ continue
+ fi
+
+ echo "${daemon} rbd-mirror process in ps output:"
+ if ps auxww |
+ awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}'
+ then
+ echo
+ echo "${cluster} rbd-mirror not running" \
+ "(can't find pid $pid in ps output)"
+ ret=1
+ continue
+ fi
+ echo
+
+ local asok_file=$(daemon_asok_file ${cluster}:${instance} ${cluster})
+ if [ ! -S "${asok_file}" ]
+ then
+ echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)"
+ ret=1
+ continue
+ fi
+
+ echo "${cluster} rbd-mirror status"
+ ceph --admin-daemon ${asok_file} rbd mirror status
+ echo
+ done
+ done
+
+ return ${ret}
+}
+
+flush()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local cmd="rbd mirror flush"
+
+ if [ -n "${image}" ]
+ then
+ cmd="${cmd} ${pool}/${image}"
+ fi
+
+ admin_daemons "${cluster}" ${cmd}
+}
+
+test_image_replay_state()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local test_state=$4
+ local status_result
+ local current_state=stopped
+
+ status_result=$(admin_daemons "${cluster}" rbd mirror status ${pool}/${image} | grep -i 'state') || return 1
+ echo "${status_result}" | grep -i 'Replaying' && current_state=started
+ test "${test_state}" = "${current_state}"
+}
+
+wait_for_image_replay_state()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local state=$4
+ local s
+
+ # TODO: add a way to force rbd-mirror to update replayers
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+ sleep ${s}
+ test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0
+ done
+ return 1
+}
+
+wait_for_image_replay_started()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started
+}
+
+wait_for_image_replay_stopped()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped
+}
+
+get_position()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local id_regexp=$4
+
+ # Parse line like below, looking for the first position
+ # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]]
+
+ local status_log=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.status
+ rbd --cluster ${cluster} -p ${pool} journal status --image ${image} |
+ tee ${status_log} >&2
+ sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*state=connected.*$/\1/p' \
+ ${status_log}
+}
+
+get_master_position()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ get_position "${cluster}" "${pool}" "${image}" ''
+}
+
+get_mirror_position()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ get_position "${cluster}" "${pool}" "${image}" '..*'
+}
+
+wait_for_replay_complete()
+{
+ local local_cluster=$1
+ local cluster=$2
+ local pool=$3
+ local image=$4
+ local s master_pos mirror_pos last_mirror_pos
+ local master_tag master_entry mirror_tag mirror_entry
+
+ while true; do
+ for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do
+ sleep ${s}
+ flush "${local_cluster}" "${pool}" "${image}"
+ master_pos=$(get_master_position "${cluster}" "${pool}" "${image}")
+ mirror_pos=$(get_mirror_position "${cluster}" "${pool}" "${image}")
+ test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0
+ test "${mirror_pos}" != "${last_mirror_pos}" && break
+ done
+
+ test "${mirror_pos}" = "${last_mirror_pos}" && return 1
+ last_mirror_pos="${mirror_pos}"
+
+ # handle the case where the mirror is ahead of the master
+ master_tag=$(echo "${master_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2)
+ mirror_tag=$(echo "${mirror_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2)
+ master_entry=$(echo "${master_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2)
+ mirror_entry=$(echo "${mirror_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2)
+ test "${master_tag}" = "${mirror_tag}" -a ${master_entry} -le ${mirror_entry} && return 0
+ done
+ return 1
+}
+
+test_status_in_pool_dir()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local state_pattern="$4"
+ local description_pattern="$5"
+ local service_pattern="$6"
+
+ local status_log=${TEMPDIR}/${cluster}-${pool}-${image}.mirror_status
+ CEPH_ARGS='' rbd --cluster ${cluster} -p ${pool} mirror image status ${image} |
+ tee ${status_log} >&2
+ grep "state: .*${state_pattern}" ${status_log} || return 1
+ grep "description: .*${description_pattern}" ${status_log} || return 1
+
+ if [ -n "${service_pattern}" ]; then
+ grep "service: *${service_pattern}" ${status_log} || return 1
+ elif echo ${state_pattern} | grep '^up+'; then
+ grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1
+ else
+ grep "service: " ${status_log} && return 1
+ fi
+
+ # recheck using `mirror pool status` command to stress test it.
+
+ local last_update="$(sed -nEe 's/^ *last_update: *(.*) *$/\1/p' ${status_log})"
+ test_mirror_pool_status_verbose \
+ ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" &&
+ return 0
+
+ echo "'mirror pool status' test failed" >&2
+ exit 1
+}
+
+test_mirror_pool_status_verbose()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local state_pattern="$4"
+ local prev_last_update="$5"
+
+ local status_log=${TEMPDIR}/${cluster}-${pool}.mirror_status
+
+ rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \
+ > ${status_log}
+
+ local last_update state
+ last_update=$($XMLSTARLET sel -t -v \
+ "//images/image[name='${image}']/last_update" < ${status_log})
+ state=$($XMLSTARLET sel -t -v \
+ "//images/image[name='${image}']/state" < ${status_log})
+
+ echo "${state}" | grep "${state_pattern}" ||
+ test "${last_update}" '>' "${prev_last_update}"
+}
+
+wait_for_status_in_pool_dir()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local state_pattern="$4"
+ local description_pattern="$5"
+ local service_pattern="$6"
+
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+ sleep ${s}
+ test_status_in_pool_dir ${cluster} ${pool} ${image} "${state_pattern}" \
+ "${description_pattern}" "${service_pattern}" &&
+ return 0
+ done
+ return 1
+}
+
+create_image()
+{
+ local cluster=$1 ; shift
+ local pool=$1 ; shift
+ local image=$1 ; shift
+ local size=128
+
+ if [ -n "$1" ]; then
+ size=$1
+ shift
+ fi
+
+ rbd --cluster ${cluster} -p ${pool} create --size ${size} \
+ --image-feature layering,exclusive-lock,journaling $@ ${image}
+}
+
+set_image_meta()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local key=$4
+ local val=$5
+
+ rbd --cluster ${cluster} -p ${pool} image-meta set ${image} $key $val
+}
+
+compare_image_meta()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local key=$4
+ local value=$5
+
+ test `rbd --cluster ${cluster} -p ${pool} image-meta get ${image} ${key}` = "${value}"
+}
+
+rename_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local new_name=$4
+
+ rbd --cluster=${cluster} -p ${pool} rename ${image} ${new_name}
+}
+
+remove_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster=${cluster} -p ${pool} snap purge ${image}
+ rbd --cluster=${cluster} -p ${pool} rm ${image}
+}
+
+remove_image_retry()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ for s in 0 1 2 4 8 16 32; do
+ sleep ${s}
+ remove_image ${cluster} ${pool} ${image} && return 0
+ done
+ return 1
+}
+
+trash_move() {
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster=${cluster} -p ${pool} trash move ${image}
+}
+
+trash_restore() {
+ local cluster=$1
+ local pool=$2
+ local image_id=$3
+
+ rbd --cluster=${cluster} -p ${pool} trash restore ${image_id}
+}
+
+clone_image()
+{
+ local cluster=$1
+ local parent_pool=$2
+ local parent_image=$3
+ local parent_snap=$4
+ local clone_pool=$5
+ local clone_image=$6
+
+ shift 6
+
+ rbd --cluster ${cluster} clone \
+ ${parent_pool}/${parent_image}@${parent_snap} \
+ ${clone_pool}/${clone_image} \
+ --image-feature layering,exclusive-lock,journaling $@
+}
+
+disconnect_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster ${cluster} -p ${pool} journal client disconnect \
+ --image ${image}
+}
+
+create_snapshot()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap=$4
+
+ rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap}
+}
+
+remove_snapshot()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap=$4
+
+ rbd --cluster ${cluster} -p ${pool} snap rm ${image}@${snap}
+}
+
+rename_snapshot()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap=$4
+ local new_snap=$5
+
+ rbd --cluster ${cluster} -p ${pool} snap rename ${image}@${snap} ${image}@${new_snap}
+}
+
+purge_snapshots()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster ${cluster} -p ${pool} snap purge ${image}
+}
+
+protect_snapshot()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap=$4
+
+ rbd --cluster ${cluster} -p ${pool} snap protect ${image}@${snap}
+}
+
+unprotect_snapshot()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap=$4
+
+ rbd --cluster ${cluster} -p ${pool} snap unprotect ${image}@${snap}
+}
+
+wait_for_snap_present()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap_name=$4
+ local s
+
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
+ sleep ${s}
+ rbd --cluster ${cluster} -p ${pool} info ${image}@${snap_name} || continue
+ return 0
+ done
+ return 1
+}
+
+test_snap_moved_to_trash()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap_name=$4
+
+ rbd --cluster ${cluster} snap ls ${pool}/${image} --all |
+ grep -F " trash (${snap_name})"
+}
+
+wait_for_snap_moved_to_trash()
+{
+ local s
+
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
+ sleep ${s}
+ test_snap_moved_to_trash $@ || continue
+ return 0
+ done
+ return 1
+}
+
+test_snap_removed_from_trash()
+{
+ test_snap_moved_to_trash $@ && return 1
+ return 0
+}
+
+wait_for_snap_removed_from_trash()
+{
+ local s
+
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
+ sleep ${s}
+ test_snap_removed_from_trash $@ || continue
+ return 0
+ done
+ return 1
+}
+
+write_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local count=$4
+ local size=$5
+
+ test -n "${size}" || size=4096
+
+ rbd --cluster ${cluster} -p ${pool} bench ${image} --io-type write \
+ --io-size ${size} --io-threads 1 --io-total $((size * count)) \
+ --io-pattern rand
+}
+
+stress_write_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local duration=$(awk 'BEGIN {srand(); print int(10 * rand()) + 5}')
+
+ set +e
+ timeout ${duration}s ceph_test_rbd_mirror_random_write \
+ --cluster ${cluster} ${pool} ${image} \
+ --debug-rbd=20 --debug-journaler=20 \
+ 2> ${TEMPDIR}/rbd-mirror-random-write.log
+ error_code=$?
+ set -e
+
+ if [ $error_code -eq 124 ]; then
+ return 0
+ fi
+ return 1
+}
+
+compare_images()
+{
+ local pool=$1
+ local image=$2
+
+ local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+ local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+ rm -f ${rmt_export} ${loc_export}
+ rbd --cluster ${CLUSTER2} -p ${pool} export ${image} - | xxd > ${rmt_export}
+ rbd --cluster ${CLUSTER1} -p ${pool} export ${image} - | xxd > ${loc_export}
+ sdiff -s ${rmt_export} ${loc_export} | head -n 64
+ rm -f ${rmt_export} ${loc_export}
+}
+
+compare_image_snapshots()
+{
+ local pool=$1
+ local image=$2
+
+ local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+ local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+ for snap_name in $(rbd --cluster ${CLUSTER1} -p ${pool} --format xml \
+ snap list ${image} | \
+ $XMLSTARLET sel -t -v "//snapshot/name" | \
+ grep -E -v "^\.rbd-mirror\."); do
+ rm -f ${rmt_export} ${loc_export}
+ rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} - | xxd > ${rmt_export}
+ rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} - | xxd > ${loc_export}
+ sdiff -s ${rmt_export} ${loc_export} | head -n 64
+ done
+ rm -f ${rmt_export} ${loc_export}
+}
+
+demote_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster=${cluster} mirror image demote ${pool}/${image}
+}
+
+promote_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local force=$4
+
+ rbd --cluster=${cluster} mirror image promote ${pool}/${image} ${force}
+}
+
+set_pool_mirror_mode()
+{
+ local cluster=$1
+ local pool=$2
+ local mode=$3
+
+ rbd --cluster=${cluster} -p ${pool} mirror pool enable ${mode}
+}
+
+disable_mirror()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster=${cluster} mirror image disable ${pool}/${image}
+}
+
+enable_mirror()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster=${cluster} mirror image enable ${pool}/${image}
+}
+
+test_image_present()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local test_state=$4
+ local image_id=$5
+ local current_state=deleted
+ local current_image_id
+
+ current_image_id=$(get_image_id ${cluster} ${pool} ${image})
+ test -n "${current_image_id}" &&
+ test -z "${image_id}" -o "${image_id}" = "${current_image_id}" &&
+ current_state=present
+
+ test "${test_state}" = "${current_state}"
+}
+
+wait_for_image_present()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local state=$4
+ local image_id=$5
+ local s
+
+ test -n "${image_id}" ||
+ image_id=$(get_image_id ${cluster} ${pool} ${image})
+
+ # TODO: add a way to force rbd-mirror to update replayers
+ for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do
+ sleep ${s}
+ test_image_present \
+ "${cluster}" "${pool}" "${image}" "${state}" "${image_id}" &&
+ return 0
+ done
+ return 1
+}
+
+get_image_id()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster=${cluster} -p ${pool} info ${image} |
+ sed -ne 's/^.*block_name_prefix: rbd_data\.//p'
+}
+
+request_resync_image()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local image_id_var_name=$4
+
+ eval "${image_id_var_name}='$(get_image_id ${cluster} ${pool} ${image})'"
+ eval 'test -n "$'${image_id_var_name}'"'
+
+ rbd --cluster=${cluster} -p ${pool} mirror image resync ${image}
+}
+
+get_image_data_pool()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster ${cluster} -p ${pool} info ${image} |
+ awk '$1 == "data_pool:" {print $2}'
+}
+
+get_clone_format()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster ${cluster} info ${pool}/${image} |
+ awk 'BEGIN {
+ format = 1
+ }
+ $1 == "parent:" {
+ parent = $2
+ }
+ /op_features: .*clone-child/ {
+ format = 2
+ }
+ END {
+ if (!parent) exit 1
+ print format
+ }'
+}
+
+#
+# Main
+#
+
+if [ "$#" -gt 0 ]
+then
+ if [ -z "${RBD_MIRROR_TEMDIR}" ]
+ then
+ echo "RBD_MIRROR_TEMDIR is not set" >&2
+ exit 1
+ fi
+
+ TEMPDIR="${RBD_MIRROR_TEMDIR}"
+ cd ${TEMPDIR}
+ $@
+ exit $?
+fi
diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh
new file mode 100755
index 00000000..a1d7d034
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_stress.sh
@@ -0,0 +1,186 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_stress.sh - stress test rbd-mirror daemon
+#
+# The following additional environment variables affect the test:
+#
+# RBD_MIRROR_REDUCE_WRITES - if not empty, don't run the stress bench write
+# tool during the many image test
+#
+
+IMAGE_COUNT=50
+export LOCKDEP=0
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+create_snap()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+ local snap_name=$4
+
+ rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap_name} \
+ --debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log
+}
+
+compare_image_snaps()
+{
+ local pool=$1
+ local image=$2
+ local snap_name=$3
+
+ local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+ local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+ rm -f ${rmt_export} ${loc_export}
+ rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} ${rmt_export}
+ rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} ${loc_export}
+ cmp ${rmt_export} ${loc_export}
+ rm -f ${rmt_export} ${loc_export}
+}
+
+wait_for_pool_images()
+{
+ local cluster=$1
+ local pool=$2
+ local image_count=$3
+ local s
+ local count
+ local last_count=0
+
+ while true; do
+ for s in `seq 1 40`; do
+ test $s -ne 1 && sleep 30
+ count=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'images: ' | cut -d' ' -f 2)
+ test "${count}" = "${image_count}" && return 0
+
+ # reset timeout if making forward progress
+ test $count -ne $last_count && break
+ done
+
+ test $count -eq $last_count && break
+ last_count=$count
+ done
+ rbd --cluster ${cluster} -p ${pool} mirror pool status --verbose >&2
+ return 1
+}
+
+wait_for_pool_healthy()
+{
+ local cluster=$1
+ local pool=$2
+ local s
+ local state
+
+ for s in `seq 1 40`; do
+ test $s -ne 1 && sleep 30
+ state=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'health:' | cut -d' ' -f 2)
+ test "${state}" = "ERROR" && break
+ test "${state}" = "OK" && return 0
+ done
+ rbd --cluster ${cluster} -p ${pool} mirror pool status --verbose >&2
+ return 1
+}
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+testlog "TEST: add image and test replay after client crashes"
+image=test
+create_image ${CLUSTER2} ${POOL} ${image} '512M'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+for i in `seq 1 10`
+do
+ stress_write_image ${CLUSTER2} ${POOL} ${image}
+
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
+
+ snap_name="snap${i}"
+ create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+ wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name}
+ compare_image_snaps ${POOL} ${image} ${snap_name}
+done
+
+for i in `seq 1 10`
+do
+ snap_name="snap${i}"
+ remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name}
+done
+
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+
+testlog "TEST: create many images"
+snap_name="snap"
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+ image="image_${i}"
+ create_image ${CLUSTER2} ${POOL} ${image} '128M'
+ if [ -n "${RBD_MIRROR_REDUCE_WRITES}" ]; then
+ write_image ${CLUSTER2} ${POOL} ${image} 100
+ else
+ stress_write_image ${CLUSTER2} ${POOL} ${image}
+ fi
+done
+
+wait_for_pool_images ${CLUSTER2} ${POOL} ${IMAGE_COUNT}
+wait_for_pool_healthy ${CLUSTER2} ${POOL}
+
+wait_for_pool_images ${CLUSTER1} ${POOL} ${IMAGE_COUNT}
+wait_for_pool_healthy ${CLUSTER1} ${POOL}
+
+testlog "TEST: compare many images"
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+ image="image_${i}"
+ create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name}
+ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+ wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+ wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name}
+ compare_image_snaps ${POOL} ${image} ${snap_name}
+done
+
+testlog "TEST: delete many images"
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+ image="image_${i}"
+ remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name}
+ remove_image_retry ${CLUSTER2} ${POOL} ${image}
+done
+
+testlog "TEST: image deletions should propagate"
+wait_for_pool_images ${CLUSTER1} ${POOL} 0
+wait_for_pool_healthy ${CLUSTER1} ${POOL} 0
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+ image="image_${i}"
+ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+done
+
+testlog "TEST: delete images during bootstrap"
+set_pool_mirror_mode ${CLUSTER1} ${POOL} 'image'
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+
+start_mirror ${CLUSTER1}
+image=test
+
+for i in `seq 1 10`
+do
+ image="image_${i}"
+ create_image ${CLUSTER2} ${POOL} ${image} '512M'
+ enable_mirror ${CLUSTER2} ${POOL} ${image}
+
+ stress_write_image ${CLUSTER2} ${POOL} ${image}
+ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+
+ disable_mirror ${CLUSTER2} ${POOL} ${image}
+ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+ purge_snapshots ${CLUSTER2} ${POOL} ${image}
+ remove_image_retry ${CLUSTER2} ${POOL} ${image}
+done
diff --git a/qa/workunits/rbd/read-flags.sh b/qa/workunits/rbd/read-flags.sh
new file mode 100755
index 00000000..7d787ce6
--- /dev/null
+++ b/qa/workunits/rbd/read-flags.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+set -ex
+
+# create a snapshot, then export it and check that setting read flags works
+# by looking at --debug-ms output
+
+function clean_up {
+ rm -f test.log || true
+ rbd snap remove test@snap || true
+ rbd rm test || true
+}
+
+function test_read_flags {
+ local IMAGE=$1
+ local SET_BALANCED=$2
+ local SET_LOCALIZED=$3
+ local EXPECT_BALANCED=$4
+ local EXPECT_LOCALIZED=$5
+
+ local EXTRA_ARGS="--log-file test.log --debug-ms 1 --no-log-to-stderr"
+ if [ "$SET_BALANCED" = 'y' ]; then
+ EXTRA_ARGS="$EXTRA_ARGS --rbd-balance-snap-reads"
+ elif [ "$SET_LOCALIZED" = 'y' ]; then
+ EXTRA_ARGS="$EXTRA_ARGS --rbd-localize-snap-reads"
+ fi
+
+ rbd export $IMAGE - $EXTRA_ARGS > /dev/null
+ if [ "$EXPECT_BALANCED" = 'y' ]; then
+ grep -q balance_reads test.log
+ else
+ grep -L balance_reads test.log | grep -q test.log
+ fi
+ if [ "$EXPECT_LOCALIZED" = 'y' ]; then
+ grep -q localize_reads test.log
+ else
+ grep -L localize_reads test.log | grep -q test.log
+ fi
+ rm -f test.log
+
+}
+
+clean_up
+
+trap clean_up INT TERM EXIT
+
+rbd create --image-feature layering -s 10 test
+rbd snap create test@snap
+
+# export from non snapshot with or without settings should not have flags
+test_read_flags test n n n n
+test_read_flags test y y n n
+
+# export from snapshot should have read flags in log if they are set
+test_read_flags test@snap n n n n
+test_read_flags test@snap y n y n
+test_read_flags test@snap n y n y
+
+# balanced_reads happens to take priority over localize_reads
+test_read_flags test@snap y y y n
+
+echo OK
diff --git a/qa/workunits/rbd/simple_big.sh b/qa/workunits/rbd/simple_big.sh
new file mode 100755
index 00000000..70aafda4
--- /dev/null
+++ b/qa/workunits/rbd/simple_big.sh
@@ -0,0 +1,12 @@
+#!/bin/sh -ex
+
+mb=100000
+
+rbd create foo --size $mb
+DEV=$(sudo rbd map foo)
+dd if=/dev/zero of=$DEV bs=1M count=$mb
+dd if=$DEV of=/dev/null bs=1M count=$mb
+sudo rbd unmap $DEV
+rbd rm foo
+
+echo OK
diff --git a/qa/workunits/rbd/test_admin_socket.sh b/qa/workunits/rbd/test_admin_socket.sh
new file mode 100755
index 00000000..6b960787
--- /dev/null
+++ b/qa/workunits/rbd/test_admin_socket.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+set -ex
+
+TMPDIR=/tmp/rbd_test_admin_socket$$
+mkdir $TMPDIR
+trap "rm -fr $TMPDIR" 0
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+function expect_false()
+{
+ set -x
+ if "$@"; then return 1; else return 0; fi
+}
+
+function rbd_watch_out_file()
+{
+ echo ${TMPDIR}/rbd_watch_$1.out
+}
+
+function rbd_watch_pid_file()
+{
+ echo ${TMPDIR}/rbd_watch_$1.pid
+}
+
+function rbd_watch_fifo()
+{
+ echo ${TMPDIR}/rbd_watch_$1.fifo
+}
+
+function rbd_watch_asok()
+{
+ echo ${TMPDIR}/rbd_watch_$1.asok
+}
+
+function rbd_get_perfcounter()
+{
+ local image=$1
+ local counter=$2
+ local name
+
+ name=$(ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) \
+ perf schema | $XMLSTARLET el -d3 |
+ grep "/librbd-.*-${image}/${counter}\$")
+ test -n "${name}" || return 1
+
+ ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) perf dump |
+ $XMLSTARLET sel -t -m "${name}" -v .
+}
+
+function rbd_check_perfcounter()
+{
+ local image=$1
+ local counter=$2
+ local expected_val=$3
+ local val=
+
+ val=$(rbd_get_perfcounter ${image} ${counter})
+
+ test "${val}" -eq "${expected_val}"
+}
+
+function rbd_watch_start()
+{
+ local image=$1
+ local asok=$(rbd_watch_asok ${image})
+
+ mkfifo $(rbd_watch_fifo ${image})
+ (cat $(rbd_watch_fifo ${image}) |
+ rbd --admin-socket ${asok} watch ${image} \
+ > $(rbd_watch_out_file ${image}) 2>&1)&
+
+ # find pid of the started rbd watch process
+ local pid
+ for i in `seq 10`; do
+ pid=$(ps auxww | awk "/[r]bd --admin.* watch ${image}/ {print \$2}")
+ test -n "${pid}" && break
+ sleep 0.1
+ done
+ test -n "${pid}"
+ echo ${pid} > $(rbd_watch_pid_file ${image})
+
+ # find watcher admin socket
+ test -n "${asok}"
+ for i in `seq 10`; do
+ test -S "${asok}" && break
+ sleep 0.1
+ done
+ test -S "${asok}"
+
+ # configure debug level
+ ceph --admin-daemon "${asok}" config set debug_rbd 20
+
+ # check that watcher is registered
+ rbd status ${image} | expect_false grep "Watchers: none"
+}
+
+function rbd_watch_end()
+{
+ local image=$1
+ local regexp=$2
+
+ # send 'enter' to watch to exit
+ echo > $(rbd_watch_fifo ${image})
+ # just in case it is not terminated
+ kill $(cat $(rbd_watch_pid_file ${image})) || :
+
+ # output rbd watch out file for easier troubleshooting
+ cat $(rbd_watch_out_file ${image})
+
+ # cleanup
+ rm -f $(rbd_watch_fifo ${image}) $(rbd_watch_pid_file ${image}) \
+ $(rbd_watch_out_file ${image}) $(rbd_watch_asok ${image})
+}
+
+pool="rbd"
+image=testimg$$
+ceph_admin="ceph --admin-daemon $(rbd_watch_asok ${image})"
+
+rbd create --size 128 ${pool}/${image}
+
+# check rbd cache commands are present in help output
+rbd_cache_flush="rbd cache flush ${pool}/${image}"
+rbd_cache_invalidate="rbd cache invalidate ${pool}/${image}"
+
+rbd_watch_start ${image}
+${ceph_admin} help | fgrep "${rbd_cache_flush}"
+${ceph_admin} help | fgrep "${rbd_cache_invalidate}"
+rbd_watch_end ${image}
+
+# test rbd cache commands with disabled and enabled cache
+for conf_rbd_cache in false true; do
+
+ rbd image-meta set ${image} conf_rbd_cache ${conf_rbd_cache}
+
+ rbd_watch_start ${image}
+
+ rbd_check_perfcounter ${image} flush 0
+ ${ceph_admin} ${rbd_cache_flush}
+ # 'flush' counter should increase regardless if cache is enabled
+ rbd_check_perfcounter ${image} flush 1
+
+ rbd_check_perfcounter ${image} invalidate_cache 0
+ ${ceph_admin} ${rbd_cache_invalidate}
+ # 'invalidate_cache' counter should increase regardless if cache is enabled
+ rbd_check_perfcounter ${image} invalidate_cache 1
+
+ rbd_watch_end ${image}
+done
+
+rbd rm ${image}
diff --git a/qa/workunits/rbd/test_librbd.sh b/qa/workunits/rbd/test_librbd.sh
new file mode 100755
index 00000000..447306bb
--- /dev/null
+++ b/qa/workunits/rbd/test_librbd.sh
@@ -0,0 +1,9 @@
+#!/bin/sh -e
+
+if [ -n "${VALGRIND}" ]; then
+ valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+ --error-exitcode=1 ceph_test_librbd
+else
+ ceph_test_librbd
+fi
+exit 0
diff --git a/qa/workunits/rbd/test_librbd_python.sh b/qa/workunits/rbd/test_librbd_python.sh
new file mode 100755
index 00000000..656a5bd1
--- /dev/null
+++ b/qa/workunits/rbd/test_librbd_python.sh
@@ -0,0 +1,12 @@
+#!/bin/sh -ex
+
+relpath=$(dirname $0)/../../../src/test/pybind
+
+if [ -n "${VALGRIND}" ]; then
+ valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+ --errors-for-leak-kinds=definite --error-exitcode=1 \
+ nosetests -v $relpath/test_rbd.py
+else
+ nosetests -v $relpath/test_rbd.py
+fi
+exit 0
diff --git a/qa/workunits/rbd/test_lock_fence.sh b/qa/workunits/rbd/test_lock_fence.sh
new file mode 100755
index 00000000..e425e161
--- /dev/null
+++ b/qa/workunits/rbd/test_lock_fence.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# can't use -e because of background process
+set -x
+
+IMAGE=rbdrw-image
+LOCKID=rbdrw
+RELPATH=$(dirname $0)/../../../src/test/librbd
+RBDRW=$RELPATH/rbdrw.py
+
+rbd create $IMAGE --size 10 --image-format 2 --image-shared || exit 1
+
+# rbdrw loops doing I/O to $IMAGE after locking with lockid $LOCKID
+python $RBDRW $IMAGE $LOCKID &
+iochild=$!
+
+# give client time to lock and start reading/writing
+LOCKS='[]'
+while [ "$LOCKS" == '[]' ]
+do
+ LOCKS=$(rbd lock list $IMAGE --format json)
+ sleep 1
+done
+
+clientaddr=$(rbd lock list $IMAGE | tail -1 | awk '{print $NF;}')
+clientid=$(rbd lock list $IMAGE | tail -1 | awk '{print $1;}')
+echo "clientaddr: $clientaddr"
+echo "clientid: $clientid"
+
+ceph osd blacklist add $clientaddr || exit 1
+
+wait $iochild
+rbdrw_exitcode=$?
+if [ $rbdrw_exitcode != 108 ]
+then
+ echo "wrong exitcode from rbdrw: $rbdrw_exitcode"
+ exit 1
+else
+ echo "rbdrw stopped with ESHUTDOWN"
+fi
+
+set -e
+ceph osd blacklist rm $clientaddr
+rbd lock remove $IMAGE $LOCKID "$clientid"
+# rbdrw will have exited with an existing watch, so, until #3527 is fixed,
+# hang out until the watch expires
+sleep 30
+rbd rm $IMAGE
+echo OK
diff --git a/qa/workunits/rbd/test_rbd_mirror.sh b/qa/workunits/rbd/test_rbd_mirror.sh
new file mode 100755
index 00000000..e139dd7e
--- /dev/null
+++ b/qa/workunits/rbd/test_rbd_mirror.sh
@@ -0,0 +1,9 @@
+#!/bin/sh -e
+
+if [ -n "${VALGRIND}" ]; then
+ valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+ --error-exitcode=1 ceph_test_rbd_mirror
+else
+ ceph_test_rbd_mirror
+fi
+exit 0
diff --git a/qa/workunits/rbd/test_rbd_tasks.sh b/qa/workunits/rbd/test_rbd_tasks.sh
new file mode 100755
index 00000000..b9663e60
--- /dev/null
+++ b/qa/workunits/rbd/test_rbd_tasks.sh
@@ -0,0 +1,276 @@
+#!/usr/bin/env bash
+set -ex
+
+POOL=rbd_tasks
+POOL_NS=ns1
+
+setup() {
+ trap 'cleanup' INT TERM EXIT
+
+ ceph osd pool create ${POOL} 128
+ rbd pool init ${POOL}
+ rbd namespace create ${POOL}/${POOL_NS}
+
+ TEMPDIR=`mktemp -d`
+}
+
+cleanup() {
+ ceph osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+
+ rm -rf ${TEMPDIR}
+}
+
+wait_for() {
+ local TEST_FN=$1
+ shift 1
+ local TEST_FN_ARGS=("$@")
+
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+ sleep ${s}
+
+ ${TEST_FN} "${TEST_FN_ARGS[@]}" || continue
+ return 0
+ done
+ return 1
+}
+
+task_exists() {
+ local TASK_ID=$1
+ [[ -z "${TASK_ID}" ]] && exit 1
+
+ ceph rbd task list ${TASK_ID} || return 1
+ return 0
+}
+
+task_dne() {
+ local TASK_ID=$1
+ [[ -z "${TASK_ID}" ]] && exit 1
+
+ ceph rbd task list ${TASK_ID} || return 0
+ return 1
+}
+
+task_in_progress() {
+ local TASK_ID=$1
+ [[ -z "${TASK_ID}" ]] && exit 1
+
+ [[ $(ceph rbd task list ${TASK_ID} | jq '.in_progress') == 'true' ]]
+}
+
+test_remove() {
+ echo "test_remove"
+
+ local IMAGE=`uuidgen`
+ rbd create --size 1 --image-shared ${POOL}/${IMAGE}
+
+ # MGR might require some time to discover the OSD map w/ new pool
+ wait_for ceph rbd task add remove ${POOL}/${IMAGE}
+}
+
+test_flatten() {
+ echo "test_flatten"
+
+ local PARENT_IMAGE=`uuidgen`
+ local CHILD_IMAGE=`uuidgen`
+
+ rbd create --size 1 --image-shared ${POOL}/${PARENT_IMAGE}
+ rbd snap create ${POOL}/${PARENT_IMAGE}@snap
+ rbd clone ${POOL}/${PARENT_IMAGE}@snap ${POOL}/${POOL_NS}/${CHILD_IMAGE} --rbd-default-clone-format=2
+ [[ "$(rbd info --format json ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq 'has("parent")')" == "true" ]]
+
+ local TASK_ID=`ceph rbd task add flatten ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ [[ "$(rbd info --format json ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq 'has("parent")')" == "false" ]]
+}
+
+test_trash_remove() {
+ echo "test_trash_remove"
+
+ local IMAGE=`uuidgen`
+ rbd create --size 1 --image-shared ${POOL}/${IMAGE}
+ local IMAGE_ID=`rbd info --format json ${POOL}/${IMAGE} | jq --raw-output ".id"`
+ rbd trash mv ${POOL}/${IMAGE}
+ [[ -n "$(rbd trash list ${POOL})" ]] || exit 1
+
+ local TASK_ID=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ [[ -z "$(rbd trash list ${POOL})" ]] || exit 1
+}
+
+test_migration_execute() {
+ echo "test_migration_execute"
+
+ local SOURCE_IMAGE=`uuidgen`
+ local TARGET_IMAGE=`uuidgen`
+ rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE}
+ rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE}
+ [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]]
+
+ local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "executed" ]]
+}
+
+test_migration_commit() {
+ echo "test_migration_commit"
+
+ local SOURCE_IMAGE=`uuidgen`
+ local TARGET_IMAGE=`uuidgen`
+ rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE}
+ rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE}
+ [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]]
+
+ local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ TASK_ID=`ceph rbd task add migration commit ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq 'has("migration")')" == "false" ]]
+ (rbd info ${POOL}/${SOURCE_IMAGE} && return 1) || true
+ rbd info ${POOL}/${TARGET_IMAGE}
+}
+
+test_migration_abort() {
+ echo "test_migration_abort"
+
+ local SOURCE_IMAGE=`uuidgen`
+ local TARGET_IMAGE=`uuidgen`
+ rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE}
+ rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE}
+ [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]]
+
+ local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ TASK_ID=`ceph rbd task add migration abort ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+ wait_for task_dne ${TASK_ID}
+
+ [[ "$(rbd status --format json ${POOL}/${SOURCE_IMAGE} | jq 'has("migration")')" == "false" ]]
+ rbd info ${POOL}/${SOURCE_IMAGE}
+ (rbd info ${POOL}/${TARGET_IMAGE} && return 1) || true
+}
+
+test_list() {
+ echo "test_list"
+
+ local IMAGE_1=`uuidgen`
+ local IMAGE_2=`uuidgen`
+
+ rbd create --size 1T --image-shared ${POOL}/${IMAGE_1}
+ rbd create --size 1T --image-shared ${POOL}/${IMAGE_2}
+
+ local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE_1} | jq --raw-output ".id"`
+ local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE_2} | jq --raw-output ".id"`
+
+ local LIST_FILE="${TEMPDIR}/list_file"
+ ceph rbd task list > ${LIST_FILE}
+ cat ${LIST_FILE}
+
+ [[ $(jq "[.[] | .id] | contains([\"${TASK_ID_1}\", \"${TASK_ID_2}\"])" ${LIST_FILE}) == "true" ]]
+
+ ceph rbd task cancel ${TASK_ID_1}
+ ceph rbd task cancel ${TASK_ID_2}
+}
+
+test_cancel() {
+ echo "test_cancel"
+
+ local IMAGE=`uuidgen`
+ rbd create --size 1T --image-shared ${POOL}/${IMAGE}
+ local TASK_ID=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+ wait_for task_exists ${TASK_ID}
+
+ ceph rbd task cancel ${TASK_ID}
+ wait_for task_dne ${TASK_ID}
+}
+
+test_duplicate_task() {
+ echo "test_duplicate_task"
+
+ local IMAGE=`uuidgen`
+ rbd create --size 1T --image-shared ${POOL}/${IMAGE}
+ local IMAGE_ID=`rbd info --format json ${POOL}/${IMAGE} | jq --raw-output ".id"`
+ rbd trash mv ${POOL}/${IMAGE}
+
+ local TASK_ID_1=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"`
+ local TASK_ID_2=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"`
+
+ [[ "${TASK_ID_1}" == "${TASK_ID_2}" ]]
+
+ ceph rbd task cancel ${TASK_ID_1}
+}
+
+test_duplicate_name() {
+ echo "test_duplicate_name"
+
+ local IMAGE=`uuidgen`
+ rbd create --size 1G --image-shared ${POOL}/${IMAGE}
+ local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+ wait_for task_dne ${TASK_ID_1}
+
+ rbd create --size 1G --image-shared ${POOL}/${IMAGE}
+ local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+ [[ "${TASK_ID_1}" != "${TASK_ID_2}" ]]
+ wait_for task_dne ${TASK_ID_2}
+
+ local TASK_ID_3=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+ [[ "${TASK_ID_2}" == "${TASK_ID_3}" ]]
+}
+
+test_progress() {
+ echo "test_progress"
+
+ local IMAGE_1=`uuidgen`
+ local IMAGE_2=`uuidgen`
+
+ rbd create --size 1 --image-shared ${POOL}/${IMAGE_1}
+ local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE_1} | jq --raw-output ".id"`
+
+ wait_for task_dne ${TASK_ID_1}
+
+ local PROGRESS_FILE="${TEMPDIR}/progress_file"
+ ceph progress json > ${PROGRESS_FILE}
+ cat ${PROGRESS_FILE}
+
+ [[ $(jq "[.completed | .[].id] | contains([\"${TASK_ID_1}\"])" ${PROGRESS_FILE}) == "true" ]]
+
+ rbd create --size 1T --image-shared ${POOL}/${IMAGE_2}
+ local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE_2} | jq --raw-output ".id"`
+
+ wait_for task_in_progress ${TASK_ID_2}
+ ceph progress json > ${PROGRESS_FILE}
+ cat ${PROGRESS_FILE}
+
+ [[ $(jq "[.events | .[].id] | contains([\"${TASK_ID_2}\"])" ${PROGRESS_FILE}) == "true" ]]
+
+ ceph rbd task cancel ${TASK_ID_2}
+ wait_for task_dne ${TASK_ID_2}
+
+ ceph progress json > ${PROGRESS_FILE}
+ cat ${PROGRESS_FILE}
+
+ [[ $(jq "[.completed | map(select(.failed)) | .[].id] | contains([\"${TASK_ID_2}\"])" ${PROGRESS_FILE}) == "true" ]]
+}
+
+setup
+test_remove
+test_flatten
+test_trash_remove
+test_migration_execute
+test_migration_commit
+test_migration_abort
+test_list
+test_cancel
+test_duplicate_task
+test_duplicate_name
+test_progress
+
+echo OK
diff --git a/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh b/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh
new file mode 100755
index 00000000..501c69cd
--- /dev/null
+++ b/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+#
+# Regression test for http://tracker.ceph.com/issues/14984
+#
+# When the bug is present, starting the rbdmap service causes
+# a bogus log message to be emitted to the log because the RBDMAPFILE
+# environment variable is not set.
+#
+# When the bug is not present, starting the rbdmap service will emit
+# no log messages, because /etc/ceph/rbdmap does not contain any lines
+# that require processing.
+#
+set -ex
+
+echo "TEST: save timestamp for use later with journalctl --since"
+TIMESTAMP=$(date +%Y-%m-%d\ %H:%M:%S)
+
+echo "TEST: assert that rbdmap has not logged anything since boot"
+journalctl -b 0 -t rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+journalctl -b 0 -t init-rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+
+echo "TEST: restart the rbdmap.service"
+sudo systemctl restart rbdmap.service
+
+echo "TEST: ensure that /usr/bin/rbdmap runs to completion"
+until sudo systemctl status rbdmap.service | grep 'active (exited)' ; do
+ sleep 0.5
+done
+
+echo "TEST: assert that rbdmap has not logged anything since TIMESTAMP"
+journalctl --since "$TIMESTAMP" -t rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+journalctl --since "$TIMESTAMP" -t init-rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+
+exit 0
diff --git a/qa/workunits/rbd/verify_pool.sh b/qa/workunits/rbd/verify_pool.sh
new file mode 100755
index 00000000..08bcca50
--- /dev/null
+++ b/qa/workunits/rbd/verify_pool.sh
@@ -0,0 +1,27 @@
+#!/bin/sh -ex
+
+POOL_NAME=rbd_test_validate_pool
+PG_NUM=32
+
+tear_down () {
+ ceph osd pool delete $POOL_NAME $POOL_NAME --yes-i-really-really-mean-it || true
+}
+
+set_up () {
+ tear_down
+ ceph osd pool create $POOL_NAME $PG_NUM
+ ceph osd pool mksnap $POOL_NAME snap
+ rbd pool init $POOL_NAME
+}
+
+trap tear_down EXIT HUP INT
+set_up
+
+# creating an image in a pool-managed snapshot pool should fail
+rbd create --pool $POOL_NAME --size 1 foo && exit 1 || true
+
+# should succeed if the pool already marked as validated
+printf "overwrite validated" | rados --pool $POOL_NAME put rbd_info -
+rbd create --pool $POOL_NAME --size 1 foo
+
+echo OK