summaryrefslogtreecommitdiffstats
path: root/qa/workunits
diff options
context:
space:
mode:
Diffstat (limited to 'qa/workunits')
-rwxr-xr-xqa/workunits/cephadm/test_cephadm_timeout.py179
-rwxr-xr-xqa/workunits/fs/full/subvolume_clone.sh9
-rwxr-xr-xqa/workunits/fs/full/subvolume_rm.sh6
-rwxr-xr-xqa/workunits/fs/full/subvolume_snapshot_rm.sh8
-rwxr-xr-xqa/workunits/fs/quota/quota.sh18
-rwxr-xr-xqa/workunits/kernel_untar_build.sh4
-rwxr-xr-xqa/workunits/mon/config.sh26
-rwxr-xr-xqa/workunits/mon/rbd_snaps_ops.sh3
-rwxr-xr-xqa/workunits/rbd/cli_generic.sh9
-rwxr-xr-xqa/workunits/rbd/compare_mirror_image_alternate_primary.sh106
-rwxr-xr-xqa/workunits/rbd/compare_mirror_images.sh170
-rwxr-xr-xqa/workunits/rbd/rbd-nbd.sh5
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_bootstrap.sh4
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_fsx_compare.sh4
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_fsx_prepare.sh4
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_ha.sh4
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_helpers.sh33
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_journal.sh4
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_snapshot.sh4
-rwxr-xr-xqa/workunits/rbd/rbd_mirror_stress.sh4
20 files changed, 556 insertions, 48 deletions
diff --git a/qa/workunits/cephadm/test_cephadm_timeout.py b/qa/workunits/cephadm/test_cephadm_timeout.py
new file mode 100755
index 000000000..67b43a2df
--- /dev/null
+++ b/qa/workunits/cephadm/test_cephadm_timeout.py
@@ -0,0 +1,179 @@
+#!/usr/bin/python3 -s
+
+import time
+import os
+import fcntl
+import subprocess
+import uuid
+import sys
+
+from typing import Optional, Any
+
+LOCK_DIR = '/run/cephadm'
+DATA_DIR = '/var/lib/ceph'
+
+class _Acquire_ReturnProxy(object):
+ def __init__(self, lock: 'FileLock') -> None:
+ self.lock = lock
+ return None
+
+ def __enter__(self) -> 'FileLock':
+ return self.lock
+
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+ self.lock.release()
+ return None
+
+class FileLock(object):
+ def __init__(self, name: str, timeout: int = -1) -> None:
+ if not os.path.exists(LOCK_DIR):
+ os.mkdir(LOCK_DIR, 0o700)
+ self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
+
+ self._lock_file_fd: Optional[int] = None
+ self.timeout = timeout
+ self._lock_counter = 0
+ return None
+
+ @property
+ def is_locked(self) -> bool:
+ return self._lock_file_fd is not None
+
+ def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
+ # Use the default timeout, if no timeout is provided.
+ if timeout is None:
+ timeout = self.timeout
+
+ # Increment the number right at the beginning.
+ # We can still undo it, if something fails.
+ self._lock_counter += 1
+
+ start_time = time.time()
+ try:
+ while True:
+ if not self.is_locked:
+ self._acquire()
+
+ if self.is_locked:
+ break
+ elif timeout >= 0 and time.time() - start_time > timeout:
+ raise Exception(self._lock_file)
+ else:
+ time.sleep(poll_intervall)
+ except Exception:
+ # Something did go wrong, so decrement the counter.
+ self._lock_counter = max(0, self._lock_counter - 1)
+
+ raise
+ return _Acquire_ReturnProxy(lock=self)
+
+ def release(self, force: bool = False) -> None:
+ if self.is_locked:
+ self._lock_counter -= 1
+
+ if self._lock_counter == 0 or force:
+ self._release()
+ self._lock_counter = 0
+
+ return None
+
+ def __enter__(self) -> 'FileLock':
+ self.acquire()
+ return self
+
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+ self.release()
+ return None
+
+ def __del__(self) -> None:
+ self.release(force=True)
+ return None
+
+ def _acquire(self) -> None:
+ open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
+ fd = os.open(self._lock_file, open_mode)
+
+ try:
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except (IOError, OSError):
+ os.close(fd)
+ else:
+ self._lock_file_fd = fd
+ return None
+
+ def _release(self) -> None:
+ fd = self._lock_file_fd
+ self._lock_file_fd = None
+ fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
+ os.close(fd) # type: ignore
+ return None
+
+def _is_fsid(s):
+ try:
+ uuid.UUID(s)
+ except ValueError:
+ return False
+ return True
+
+def find_fsid():
+ if not os.path.exists(DATA_DIR):
+ raise Exception(f'{DATA_DIR} does not exist. Aborting...')
+
+ for d in os.listdir(DATA_DIR):
+ # assume the first thing we find that is an fsid
+ # is what we want. Not expecting multiple clusters
+ # to have been installed here.
+ if _is_fsid(d):
+ return d
+ raise Exception(f'No fsid dir found in {DATA_DIR} does not exist. Aborting...')
+
+def main():
+ print('Looking for cluster fsid...')
+ fsid = find_fsid()
+ print(f'Found fsid {fsid}')
+
+ print('Setting cephadm command timeout to 120...')
+ subprocess.run(['cephadm', 'shell', '--', 'ceph', 'config', 'set',
+ 'mgr', 'mgr/cephadm/default_cephadm_command_timeout', '120'],
+ check=True)
+
+ print('Taking hold of cephadm lock for 300 seconds...')
+ lock = FileLock(fsid, 300)
+ lock.acquire()
+
+ print('Triggering cephadm device refresh...')
+ subprocess.run(['cephadm', 'shell', '--', 'ceph', 'orch', 'device', 'ls', '--refresh'],
+ check=True)
+
+ print('Sleeping 150 seconds to allow for timeout to occur...')
+ time.sleep(150)
+
+ print('Checking ceph health detail...')
+ # directing stdout to res.stdout via "capture_stdout" option
+ # (and same for stderr) seems to have been added in python 3.7.
+ # Using files so this works with 3.6 as well
+ with open('/tmp/ceph-health-detail-stdout', 'w') as f_stdout:
+ with open('/tmp/ceph-health-detail-stderr', 'w') as f_stderr:
+ subprocess.run(['cephadm', 'shell', '--', 'ceph', 'health', 'detail'],
+ check=True, stdout=f_stdout, stderr=f_stderr)
+
+ res_stdout = open('/tmp/ceph-health-detail-stdout', 'r').read()
+ res_stderr = open('/tmp/ceph-health-detail-stderr', 'r').read()
+ print(f'"cephadm shell -- ceph health detail" stdout:\n{res_stdout}')
+ print(f'"cephadm shell -- ceph health detail" stderr:\n{res_stderr}')
+
+ print('Checking for correct health warning in health detail...')
+ if 'CEPHADM_REFRESH_FAILED' not in res_stdout:
+ raise Exception('No health warning caused by timeout was raised')
+ if 'Command "cephadm ceph-volume -- inventory" timed out' not in res_stdout:
+ raise Exception('Health warnings did not contain message about time out')
+
+ print('Health warnings found succesfully. Exiting.')
+ return 0
+
+
+if __name__ == '__main__':
+ if os.getuid() != 0:
+ print('Trying to run myself with sudo...')
+ os.execvp('sudo', [sys.executable] + list(sys.argv))
+ main()
diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh
index a11131215..d61e07111 100755
--- a/qa/workunits/fs/full/subvolume_clone.sh
+++ b/qa/workunits/fs/full/subvolume_clone.sh
@@ -7,8 +7,8 @@ set -ex
# Hence the subsequent subvolume commands on the clone fails with
# 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback.
-# The osd is of the size 1GB. The full-ratios are set so that osd is treated full
-# at around 600MB. The subvolume is created and 100MB is written.
+# The osd is of the size 2GiB. The full-ratios are set so that osd is treated full
+# at around 1.2GB. The subvolume is created and 200MB is written.
# The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds,
# all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails
# with ENOSPACE.
@@ -47,7 +47,7 @@ echo "After ratios are set"
df -h
ceph osd df
-for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done
+for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/2MB_file-$i status=progress bs=1M count=2 conv=fdatasync;done
# For debugging
echo "After subvolumes are written"
@@ -60,6 +60,9 @@ ceph fs subvolume snapshot create cephfs sub_0 snap_0
# Set clone snapshot delay
ceph config set mgr mgr/volumes/snapshot_clone_delay 15
+# Disable the snapshot_clone_no_wait config option
+ceph config set mgr mgr/volumes/snapshot_clone_no_wait false
+
# Schedule few clones, some would fail with no space
for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done
diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh
index a464e30f5..2a3bf956d 100755
--- a/qa/workunits/fs/full/subvolume_rm.sh
+++ b/qa/workunits/fs/full/subvolume_rm.sh
@@ -2,8 +2,8 @@
set -ex
# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command
-# when the osd is full. The command used to hang. The osd is of the size 1GB.
-# The subvolume is created and 500MB file is written. The full-ratios are
+# when the osd is full. The command used to hang. The osd is of the size 2GiB.
+# The subvolume is created and 1GB file is written. The full-ratios are
# set below 500MB such that the osd is treated as full. Now the subvolume is
# is removed. This should be successful with the introduction of FULL
# capabilities which the mgr holds.
@@ -21,7 +21,7 @@ echo "Before write"
df -h
ceph osd df
-sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500
+sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1GB_file-1 status=progress bs=1M count=1000
ceph osd set-full-ratio 0.2
ceph osd set-nearfull-ratio 0.16
diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
index f6d0add9f..8df89d3c7 100755
--- a/qa/workunits/fs/full/subvolume_snapshot_rm.sh
+++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
@@ -7,8 +7,8 @@ set -ex
# snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)'
# traceback.
-# The osd is of the size 1GB. The subvolume is created and 800MB file is written.
-# Then full-ratios are set below 500MB such that the osd is treated as full.
+# The osd is of the size 2GiB. The subvolume is created and 1.6GB file is written.
+# Then full-ratios are set below 1GiB such that the osd is treated as full.
# The subvolume snapshot is taken which succeeds as no extra space is required
# for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it
# fails to remove the snapshot metadata set. The snapshot removal fails
@@ -31,8 +31,8 @@ echo "Before write"
df $CEPH_MNT
ceph osd df
-# Write 800MB file and set full ratio to around 200MB
-ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync
+# Write 1.6GB file and set full ratio to around 400MB
+ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1.6GB_file-1 status=progress bs=1M count=1600 conv=fdatasync
ceph osd set-full-ratio 0.2
ceph osd set-nearfull-ratio 0.16
diff --git a/qa/workunits/fs/quota/quota.sh b/qa/workunits/fs/quota/quota.sh
index 1315be6d8..a2f5c459d 100755
--- a/qa/workunits/fs/quota/quota.sh
+++ b/qa/workunits/fs/quota/quota.sh
@@ -29,7 +29,7 @@ mkdir quota-test
cd quota-test
# bytes
-setfattr . -n ceph.quota.max_bytes -v 100000000 # 100m
+setfattr . -n ceph.quota.max_bytes -v 100M
expect_false write_file big 1000 # 1g
expect_false write_file second 10
setfattr . -n ceph.quota.max_bytes -v 0
@@ -57,7 +57,7 @@ rm -rf *
# mix
mkdir bytes bytes/files
-setfattr bytes -n ceph.quota.max_bytes -v 10000000 #10m
+setfattr bytes -n ceph.quota.max_bytes -v 10M
setfattr bytes/files -n ceph.quota.max_files -v 5
dd if=/dev/zero of=bytes/files/1 bs=1M count=4
dd if=/dev/zero of=bytes/files/2 bs=1M count=4
@@ -78,7 +78,7 @@ rm -rf *
#mv
mkdir files limit
truncate files/file -s 10G
-setfattr limit -n ceph.quota.max_bytes -v 1000000 #1m
+setfattr limit -n ceph.quota.max_bytes -v 1M
expect_false mv files limit/
@@ -88,8 +88,8 @@ rm -rf *
#limit by ancestor
mkdir -p ancestor/p1/p2/parent/p3
-setfattr ancestor -n ceph.quota.max_bytes -v 1000000
-setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1000000000 #1g
+setfattr ancestor -n ceph.quota.max_bytes -v 1M
+setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1G
expect_false write_file ancestor/p1/p2/parent/p3/file1 900 #900m
stat --printf="%n %s\n" ancestor/p1/p2/parent/p3/file1
@@ -104,6 +104,14 @@ expect_false setfattr -n ceph.quota.max_bytes -v -1 .
expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775808 .
expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775809 .
+setfattr -n ceph.quota.max_bytes -v 0 .
+setfattr -n ceph.quota.max_bytes -v 1Ti .
+setfattr -n ceph.quota.max_bytes -v 8388607Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v 8388608Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v -1Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v -8388609Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v -8388610Ti .
+
setfattr -n ceph.quota.max_files -v 0 .
setfattr -n ceph.quota.max_files -v 1 .
setfattr -n ceph.quota.max_files -v 9223372036854775807 .
diff --git a/qa/workunits/kernel_untar_build.sh b/qa/workunits/kernel_untar_build.sh
index 9b60f065c..602ce04a7 100755
--- a/qa/workunits/kernel_untar_build.sh
+++ b/qa/workunits/kernel_untar_build.sh
@@ -2,11 +2,11 @@
set -e
-wget -O linux.tar.gz http://download.ceph.com/qa/linux-5.4.tar.gz
+wget -O linux.tar.xz http://download.ceph.com/qa/linux-6.5.11.tar.xz
mkdir t
cd t
-tar xzf ../linux.tar.gz
+tar xJf ../linux.tar.xz
cd linux*
make defconfig
make -j`grep -c processor /proc/cpuinfo`
diff --git a/qa/workunits/mon/config.sh b/qa/workunits/mon/config.sh
index 1b00201ae..10cbe5630 100755
--- a/qa/workunits/mon/config.sh
+++ b/qa/workunits/mon/config.sh
@@ -98,11 +98,11 @@ ceph tell osd.0 config unset debug_asok
ceph tell osd.0 config unset debug_asok
ceph config rm osd.0 debug_asok
-while ceph config show osd.0 | grep debug_asok | grep mon
+while ceph config show osd.0 | grep '^debug_asok[:[space]:]' | grep mon
do
sleep 1
done
-ceph config show osd.0 | grep -c debug_asok | grep 0
+ceph config show osd.0 | grep -c '^debug_asok[:[space]:]' | grep 0
ceph config set osd.0 osd_scrub_cost 123
while ! ceph config show osd.0 | grep osd_scrub_cost | grep mon
@@ -111,6 +111,13 @@ do
done
ceph config rm osd.0 osd_scrub_cost
+#RGW daemons test config set
+ceph config set client.rgw debug_rgw 22
+while ! ceph config show client.rgw | grep debug_rgw | grep 22 | grep mon
+do
+ sleep 1
+done
+
# show-with-defaults
ceph config show-with-defaults osd.0 | grep debug_asok
@@ -130,6 +137,21 @@ rm -f $t1 $t2
expect_false ceph config reset
expect_false ceph config reset -1
+
+
+# test parallel config set
+# reproducer for https://tracker.ceph.com/issues/62832
+ceph config reset 0
+for ((try = 0; try < 10; try++)); do
+ set +x
+ for ((i = 0; i < 100; i++)); do
+ # Use a config that will get "handled" by the Objecter instantiated by the ceph binary
+ ceph config set client rados_mon_op_timeout $((i+300)) &
+ done 2> /dev/null
+ set -x
+ wait
+done
+
# we are at end of testing, so it's okay to revert everything
ceph config reset 0
diff --git a/qa/workunits/mon/rbd_snaps_ops.sh b/qa/workunits/mon/rbd_snaps_ops.sh
index eb88565ea..0e5b16b7b 100755
--- a/qa/workunits/mon/rbd_snaps_ops.sh
+++ b/qa/workunits/mon/rbd_snaps_ops.sh
@@ -36,6 +36,7 @@ expect 'rbd --pool=test snap ls image' 0
expect 'rbd --pool=test snap rm image@snapshot' 0
expect 'ceph osd pool mksnap test snapshot' 22
+expect 'rados -p test mksnap snapshot' 1
expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0
@@ -52,6 +53,8 @@ expect 'rbd --pool test-foo snap create image@snapshot' 0
ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true
expect 'ceph osd pool create test-bar 8' 0
expect 'ceph osd pool application enable test-bar rbd'
+# "rados cppool" without --yes-i-really-mean-it should fail
+expect 'rados cppool test-foo test-bar' 1
expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0
expect 'rbd --pool test-bar snap rm image@snapshot' 95
expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
index 57279d26d..15c47074d 100755
--- a/qa/workunits/rbd/cli_generic.sh
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -432,6 +432,7 @@ test_trash() {
rbd trash mv test2
ID=`rbd trash ls | cut -d ' ' -f 1`
rbd info --image-id $ID | grep "rbd image 'test2'"
+ rbd children --image-id $ID | wc -l | grep 0
rbd trash restore $ID
rbd ls | grep test2
@@ -449,6 +450,7 @@ test_trash() {
rbd create $RBD_CREATE_ARGS -s 1 test1
rbd snap create test1@snap1
rbd snap protect test1@snap1
+ rbd clone test1@snap1 clone
rbd trash mv test1
rbd trash ls | grep test1
@@ -459,7 +461,10 @@ test_trash() {
ID=`rbd trash ls | cut -d ' ' -f 1`
rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1
rbd snap ls --image-id $ID | grep '.*snap1.*'
+ rbd children --image-id $ID | wc -l | grep 1
+ rbd children --image-id $ID | grep 'clone'
+ rbd rm clone
rbd snap unprotect --image-id $ID --snap snap1
rbd snap rm --image-id $ID --snap snap1
rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0
@@ -1261,7 +1266,6 @@ test_trash_purge_schedule_recovery() {
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
- ceph osd blocklist ls | grep $CLIENT_ADDR
# Check that you can add a trash purge schedule after a few retries
expect_fail rbd trash purge schedule add -p rbd3 10m
@@ -1420,7 +1424,6 @@ test_mirror_snapshot_schedule_recovery() {
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
- ceph osd blocklist ls | grep $CLIENT_ADDR
# Check that you can add a mirror snapshot schedule after a few retries
expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m
@@ -1529,7 +1532,6 @@ test_perf_image_iostat_recovery() {
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
- ceph osd blocklist ls | grep $CLIENT_ADDR
expect_fail rbd perf image iostat --format json rbd3/ns
sleep 10
@@ -1661,7 +1663,6 @@ test_tasks_recovery() {
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
- ceph osd blocklist ls | grep $CLIENT_ADDR
expect_fail ceph rbd task add flatten rbd2/clone1
sleep 10
diff --git a/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh
new file mode 100755
index 000000000..78a390230
--- /dev/null
+++ b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+set -ex
+
+IMAGE=image-alternate-primary
+MIRROR_IMAGE_MODE=snapshot
+MIRROR_POOL_MODE=image
+MOUNT=test-alternate-primary
+RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
+RBD_MIRROR_INSTANCES=1
+RBD_MIRROR_MODE=snapshot
+RBD_MIRROR_USE_EXISTING_CLUSTER=1
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+take_mirror_snapshots() {
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ for i in {1..30}; do
+ mirror_image_snapshot $cluster $pool $image
+ sleep 3
+ done
+}
+
+slow_untar_workload() {
+ local mountpt=$1
+
+ cp linux-5.4.tar.gz $mountpt
+ # run workload that updates the data and metadata of multiple files on disk.
+ # rate limit the workload such that the mirror snapshots can be taken as the
+ # contents of the image are progressively changed by the workload.
+ local ret=0
+ timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
+ | pv -L 256K | tar xf - -C $mountpt" || ret=$?
+ if ((ret != 124)); then
+ echo "Workload completed prematurely"
+ return 1
+ fi
+}
+
+setup
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+# initial setup
+create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \
+ ${RBD_MIRROR_MODE} 10G
+
+if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+ DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
+ -o try-netlink ${POOL}/${IMAGE})
+elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+ DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
+ ${POOL}/${IMAGE})
+else
+ echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
+ exit 1
+fi
+sudo mkfs.ext4 ${DEV}
+mkdir ${MOUNT}
+
+wget https://download.ceph.com/qa/linux-5.4.tar.gz
+
+for i in {1..25}; do
+ # create mirror snapshots every few seconds under I/O
+ sudo mount ${DEV} ${MOUNT}
+ sudo chown $(whoami) ${MOUNT}
+ rm -rf ${MOUNT}/*
+ take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} &
+ SNAP_PID=$!
+ slow_untar_workload ${MOUNT}
+ wait $SNAP_PID
+ sudo umount ${MOUNT}
+
+ # calculate hash before demotion of primary image
+ DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
+ sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV}
+
+ demote_image ${CLUSTER1} ${POOL} ${IMAGE}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown'
+ wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown'
+ promote_image ${CLUSTER2} ${POOL} ${IMAGE}
+
+ # calculate hash after promotion of secondary image
+ if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+ DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
+ -o try-netlink ${POOL}/${IMAGE})
+ elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+ DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE})
+ fi
+ PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
+
+ if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then
+ echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}"
+ exit 1
+ fi
+
+ TEMP=${CLUSTER1}
+ CLUSTER1=${CLUSTER2}
+ CLUSTER2=${TEMP}
+done
+
+echo OK
diff --git a/qa/workunits/rbd/compare_mirror_images.sh b/qa/workunits/rbd/compare_mirror_images.sh
new file mode 100755
index 000000000..cbaa77a71
--- /dev/null
+++ b/qa/workunits/rbd/compare_mirror_images.sh
@@ -0,0 +1,170 @@
+#!/usr/bin/env bash
+
+set -ex
+
+IMG_PREFIX=image-primary
+MIRROR_IMAGE_MODE=snapshot
+MIRROR_POOL_MODE=image
+MNTPT_PREFIX=test-primary
+RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
+RBD_MIRROR_INSTANCES=1
+RBD_MIRROR_MODE=snapshot
+RBD_MIRROR_USE_EXISTING_CLUSTER=1
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+take_mirror_snapshots() {
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ for i in {1..30}; do
+ mirror_image_snapshot $cluster $pool $image
+ sleep 3
+ done
+}
+
+slow_untar_workload() {
+ local mountpt=$1
+
+ cp linux-5.4.tar.gz $mountpt
+ # run workload that updates the data and metadata of multiple files on disk.
+ # rate limit the workload such that the mirror snapshots can be taken as the
+ # contents of the image are progressively changed by the workload.
+ local ret=0
+ timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
+ | pv -L 256K | tar xf - -C $mountpt" || ret=$?
+ if ((ret != 124)); then
+ echo "Workload completed prematurely"
+ return 1
+ fi
+}
+
+wait_for_image_removal() {
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+ if ! rbd --cluster $cluster ls $pool | grep -wq $image; then
+ return 0
+ fi
+ sleep $s
+ done
+
+ echo "image ${pool}/${image} not removed from cluster ${cluster}"
+ return 1
+}
+
+compare_demoted_promoted_image() {
+ local dev=${DEVS[$1-1]}
+ local img=${IMG_PREFIX}$1
+ local mntpt=${MNTPT_PREFIX}$1
+ local demote_md5 promote_md5
+
+ sudo umount ${mntpt}
+
+ # calculate hash before demotion of primary image
+ demote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
+ sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \
+ ${POOL}/${img}
+
+ demote_image ${CLUSTER1} ${POOL} ${img}
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown'
+ wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown'
+ promote_image ${CLUSTER2} ${POOL} ${img}
+
+ # calculate hash after promotion of secondary image
+ if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+ dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
+ -o try-netlink ${POOL}/${img})
+ elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+ dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img})
+ fi
+ promote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
+ sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev}
+
+ if [[ "${demote_md5}" != "${promote_md5}" ]]; then
+ echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}"
+ return 1
+ fi
+}
+
+setup
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+wget https://download.ceph.com/qa/linux-5.4.tar.gz
+
+for i in {1..10}; do
+ DEVS=()
+ SNAP_PIDS=()
+ COMPARE_PIDS=()
+ WORKLOAD_PIDS=()
+ RET=0
+ for j in {1..10}; do
+ IMG=${IMG_PREFIX}${j}
+ MNTPT=${MNTPT_PREFIX}${j}
+ create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \
+ ${RBD_MIRROR_MODE} 10G
+ if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+ DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
+ -o try-netlink ${POOL}/${IMG})
+ elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+ DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
+ ${POOL}/${IMG})
+ else
+ echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
+ exit 1
+ fi
+ DEVS+=($DEV)
+ sudo mkfs.ext4 ${DEV}
+ mkdir ${MNTPT}
+ sudo mount ${DEV} ${MNTPT}
+ sudo chown $(whoami) ${MNTPT}
+ # create mirror snapshots under I/O every few seconds
+ take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} &
+ SNAP_PIDS+=($!)
+ slow_untar_workload ${MNTPT} &
+ WORKLOAD_PIDS+=($!)
+ done
+ for pid in ${SNAP_PIDS[@]}; do
+ wait $pid || RET=$?
+ done
+ if ((RET != 0)); then
+ echo "take_mirror_snapshots failed"
+ exit 1
+ fi
+ for pid in ${WORKLOAD_PIDS[@]}; do
+ wait $pid || RET=$?
+ done
+ if ((RET != 0)); then
+ echo "slow_untar_workload failed"
+ exit 1
+ fi
+
+ for j in {1..10}; do
+ compare_demoted_promoted_image $j &
+ COMPARE_PIDS+=($!)
+ done
+ for pid in ${COMPARE_PIDS[@]}; do
+ wait $pid || RET=$?
+ done
+ if ((RET != 0)); then
+ echo "compare_demoted_promoted_image failed"
+ exit 1
+ fi
+
+ for j in {1..10}; do
+ IMG=${IMG_PREFIX}${j}
+ # Allow for removal of non-primary image by checking that mirroring
+ # image status is "up+replaying"
+ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG} 'up+replaying'
+ remove_image ${CLUSTER2} ${POOL} ${IMG}
+ wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG}
+ rm -rf ${MNTPT_PREFIX}${j}
+ done
+done
+
+echo OK
diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh
index bc89e9be5..8e1b05b3f 100755
--- a/qa/workunits/rbd/rbd-nbd.sh
+++ b/qa/workunits/rbd/rbd-nbd.sh
@@ -202,8 +202,11 @@ provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
used=`rbd -p ${POOL} --format xml du ${IMAGE} |
$XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
[ "${used}" -lt "${provisioned}" ]
+unmap_device ${DEV} ${PID}
# resize test
+DEV=`_sudo rbd device -t nbd -o try-netlink map ${POOL}/${IMAGE}`
+get_pid ${POOL}
devname=$(basename ${DEV})
blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
test -n "${blocks}"
@@ -216,9 +219,9 @@ rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
test -n "${blocks2}"
test ${blocks2} -eq ${blocks}
+unmap_device ${DEV} ${PID}
# read-only option test
-unmap_device ${DEV} ${PID}
DEV=`_sudo rbd --device-type nbd map --read-only ${POOL}/${IMAGE}`
PID=$(rbd device --device-type nbd list | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \
'$2 == pool && $3 == img && $5 == dev {print $1}')
diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
index 6ef06f2b8..f4c1070bc 100755
--- a/qa/workunits/rbd/rbd_mirror_bootstrap.sh
+++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
@@ -1,8 +1,10 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_bootstrap.sh - test peer bootstrap create/import
#
+set -ex
+
RBD_MIRROR_MANUAL_PEERS=1
RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1}
. $(dirname $0)/rbd_mirror_helpers.sh
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
index 0ba3c97d7..79c36546d 100755
--- a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
+++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
@@ -1,10 +1,12 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload
#
# The script is used to compare FSX-generated images between two clusters.
#
+set -ex
+
. $(dirname $0)/rbd_mirror_helpers.sh
trap 'cleanup $?' INT TERM EXIT
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
index d988987ba..6daadbbb4 100755
--- a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
+++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
@@ -1,10 +1,12 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload
#
# The script is used to compare FSX-generated images between two clusters.
#
+set -ex
+
. $(dirname $0)/rbd_mirror_helpers.sh
setup
diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh
index 37739a83d..1e43712a6 100755
--- a/qa/workunits/rbd/rbd_mirror_ha.sh
+++ b/qa/workunits/rbd/rbd_mirror_ha.sh
@@ -1,8 +1,10 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
#
+set -ex
+
RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7}
. $(dirname $0)/rbd_mirror_helpers.sh
diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh
index f4961b925..b6abff96d 100755
--- a/qa/workunits/rbd/rbd_mirror_helpers.sh
+++ b/qa/workunits/rbd/rbd_mirror_helpers.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
#
# rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
#
@@ -814,23 +814,23 @@ test_status_in_pool_dir()
local description_pattern="$5"
local service_pattern="$6"
- local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status)
- CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} |
- tee ${status_log} >&2
- grep "^ state: .*${state_pattern}" ${status_log} || return 1
- grep "^ description: .*${description_pattern}" ${status_log} || return 1
+ local status
+ status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror image status \
+ ${pool}/${image})
+ grep "^ state: .*${state_pattern}" <<< "$status" || return 1
+ grep "^ description: .*${description_pattern}" <<< "$status" || return 1
if [ -n "${service_pattern}" ]; then
- grep "service: *${service_pattern}" ${status_log} || return 1
+ grep "service: *${service_pattern}" <<< "$status" || return 1
elif echo ${state_pattern} | grep '^up+'; then
- grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1
+ grep "service: *${MIRROR_USER_ID_PREFIX}.* on " <<< "$status" || return 1
else
- grep "service: " ${status_log} && return 1
+ grep "service: " <<< "$status" && return 1
fi
# recheck using `mirror pool status` command to stress test it.
-
- local last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' ${status_log})"
+ local last_update
+ last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' <<< "$status")"
test_mirror_pool_status_verbose \
${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" &&
return 0
@@ -847,16 +847,15 @@ test_mirror_pool_status_verbose()
local state_pattern="$4"
local prev_last_update="$5"
- local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status)
-
- rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \
- > ${status_log}
+ local status
+ status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror pool status ${pool} \
+ --verbose --format xml)
local last_update state
last_update=$($XMLSTARLET sel -t -v \
- "//images/image[name='${image}']/last_update" < ${status_log})
+ "//images/image[name='${image}']/last_update" <<< "$status")
state=$($XMLSTARLET sel -t -v \
- "//images/image[name='${image}']/state" < ${status_log})
+ "//images/image[name='${image}']/state" <<< "$status")
echo "${state}" | grep "${state_pattern}" ||
test "${last_update}" '>' "${prev_last_update}"
diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh
index 54f6aeec8..20a3b87db 100755
--- a/qa/workunits/rbd/rbd_mirror_journal.sh
+++ b/qa/workunits/rbd/rbd_mirror_journal.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_journal.sh - test rbd-mirror daemon in journal-based mirroring mode
#
@@ -7,6 +7,8 @@
# socket, temporary files, and launches rbd-mirror daemon.
#
+set -ex
+
. $(dirname $0)/rbd_mirror_helpers.sh
setup
diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh
index c70d48b09..17164c4d5 100755
--- a/qa/workunits/rbd/rbd_mirror_snapshot.sh
+++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_snapshot.sh - test rbd-mirror daemon in snapshot-based mirroring mode
#
@@ -7,6 +7,8 @@
# socket, temporary files, and launches rbd-mirror daemon.
#
+set -ex
+
MIRROR_POOL_MODE=image
MIRROR_IMAGE_MODE=snapshot
diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh
index cb79aba7e..ea39d3aae 100755
--- a/qa/workunits/rbd/rbd_mirror_stress.sh
+++ b/qa/workunits/rbd/rbd_mirror_stress.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
#
# rbd_mirror_stress.sh - stress test rbd-mirror daemon
#
@@ -8,6 +8,8 @@
# tool during the many image test
#
+set -ex
+
IMAGE_COUNT=50
export LOCKDEP=0