From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- qa/workunits/fs/full/subvolume_clone.sh | 114 ++++++++++++++++++++++++++ qa/workunits/fs/full/subvolume_rm.sh | 72 ++++++++++++++++ qa/workunits/fs/full/subvolume_snapshot_rm.sh | 86 +++++++++++++++++++ 3 files changed, 272 insertions(+) create mode 100755 qa/workunits/fs/full/subvolume_clone.sh create mode 100755 qa/workunits/fs/full/subvolume_rm.sh create mode 100755 qa/workunits/fs/full/subvolume_snapshot_rm.sh (limited to 'qa/workunits/fs/full') diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh new file mode 100755 index 000000000..a11131215 --- /dev/null +++ b/qa/workunits/fs/full/subvolume_clone.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the 'ceph fs subvolume snapshot clone' when the osd is full. +# The clone fails with 'MetadataMgrException: -28 (error in write)' and +# truncates the config file of corresponding subvolume while updating the config file. +# Hence the subsequent subvolume commands on the clone fails with +# 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback. + +# The osd is of the size 1GB. The full-ratios are set so that osd is treated full +# at around 600MB. The subvolume is created and 100MB is written. +# The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds, +# all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails +# with ENOSPACE. + +# At this stage, ".meta" config file of the failed clones are checked if it's truncated. +# and clone status command is checked for traceback. + +# Note that the failed clones would be in retry loop and it's state would be 'pending' or 'in-progress'. +# It's state is not updated to 'failed' as the config update to gets ENOSPACE too. + +set -e +ignore_failure() { + if "$@"; then return 0; else return 0; fi +} + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +NUM_CLONES=10 + +ceph fs subvolume create cephfs sub_0 +subvol_path_0=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +# For debugging +echo "Before ratios are set" +df $CEPH_MNT +ceph osd df + +ceph osd set-full-ratio 0.6 +ceph osd set-nearfull-ratio 0.50 +ceph osd set-backfillfull-ratio 0.55 + +# For debugging +echo "After ratios are set" +df -h +ceph osd df + +for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done + +# For debugging +echo "After subvolumes are written" +df -h $CEPH_MNT +ceph osd df + +# snapshot +ceph fs subvolume snapshot create cephfs sub_0 snap_0 + +# Set clone snapshot delay +ceph config set mgr mgr/volumes/snapshot_clone_delay 15 + +# Schedule few clones, some would fail with no space +for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done + +# Wait for osd is full +timeout=90 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +# For debugging +echo "After osd is full" +df -h $CEPH_MNT +ceph osd df + +# Check clone status, this should not crash +for i in $(eval echo {1..$NUM_CLONES}) +do + ignore_failure ceph fs clone status cephfs clone_$i >/tmp/out_${PID}_file 2>/tmp/error_${PID}_file + cat /tmp/error_${PID}_file + if grep "complete" /tmp/out_${PID}_file; then + echo "The clone_$i is completed" + else + #in-progress/pending clones, No traceback should be found in stderr + echo clone_$i in PENDING/IN-PROGRESS + expect_failure sudo grep "Traceback" /tmp/error_${PID}_file + #config file should not be truncated and GLOBAL section should be found + sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/clone_$i/.meta + fi +done + +# Hard cleanup +ignore_failure sudo rm -rf $CEPH_MNT/_index/clone/* +ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/clone_* +ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0 +ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0 + +#Set the ratios back for other full tests to run +ceph osd set-full-ratio 0.95 +ceph osd set-nearfull-ratio 0.95 +ceph osd set-backfillfull-ratio 0.95 + +#After test +echo "After test" +df -h $CEPH_MNT +ceph osd df + +echo OK diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh new file mode 100755 index 000000000..a464e30f5 --- /dev/null +++ b/qa/workunits/fs/full/subvolume_rm.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command +# when the osd is full. The command used to hang. The osd is of the size 1GB. +# The subvolume is created and 500MB file is written. The full-ratios are +# set below 500MB such that the osd is treated as full. Now the subvolume is +# is removed. This should be successful with the introduction of FULL +# capabilities which the mgr holds. + +set -e +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +ceph fs subvolume create cephfs sub_0 +subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +#For debugging +echo "Before write" +df -h +ceph osd df + +sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500 + +ceph osd set-full-ratio 0.2 +ceph osd set-nearfull-ratio 0.16 +ceph osd set-backfillfull-ratio 0.18 + +timeout=30 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +#For debugging +echo "After ratio set" +df -h +ceph osd df + +#Delete subvolume +ceph fs subvolume rm cephfs sub_0 + +#Validate subvolume is deleted +expect_failure ceph fs subvolume info cephfs sub_0 + +#Wait for subvolume to delete data +trashdir=$CEPH_MNT/volumes/_deleting +timeout=30 +while [ $timeout -gt 0 ] +do + [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" && break + echo "Wating for trash dir to be empty: $timeout" + sleep 1 + let "timeout-=1" +done + +#Set the ratios back for other full tests to run +ceph osd set-full-ratio 0.95 +ceph osd set-nearfull-ratio 0.95 +ceph osd set-backfillfull-ratio 0.95 + +#After test +echo "After test" +df -h +ceph osd df + +echo OK diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh new file mode 100755 index 000000000..f6d0add9f --- /dev/null +++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the 'ceph fs subvolume snapshot rm' when the osd is full. +# The snapshot rm fails with 'MetadataMgrException: -28 (error in write)' and +# truncates the config file of corresponding subvolume. Hence the subsequent +# snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' +# traceback. + +# The osd is of the size 1GB. The subvolume is created and 800MB file is written. +# Then full-ratios are set below 500MB such that the osd is treated as full. +# The subvolume snapshot is taken which succeeds as no extra space is required +# for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it +# fails to remove the snapshot metadata set. The snapshot removal fails +# but should not traceback and truncate the config file. + +set -e +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +ignore_failure() { + if "$@"; then return 0; else return 0; fi +} + +ceph fs subvolume create cephfs sub_0 +subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +#For debugging +echo "Before write" +df $CEPH_MNT +ceph osd df + +# Write 800MB file and set full ratio to around 200MB +ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync + +ceph osd set-full-ratio 0.2 +ceph osd set-nearfull-ratio 0.16 +ceph osd set-backfillfull-ratio 0.18 + +timeout=30 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +#Take snapshot +ceph fs subvolume snapshot create cephfs sub_0 snap_0 + +#Remove snapshot fails but should not throw traceback +expect_failure ceph fs subvolume snapshot rm cephfs sub_0 snap_0 2>/tmp/error_${PID}_file +cat /tmp/error_${PID}_file + +# No traceback should be found +expect_failure grep "Traceback" /tmp/error_${PID}_file + +# Validate config file is not truncated and GLOBAL section exists +sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/sub_0/.meta + +#For debugging +echo "After write" +df $CEPH_MNT +ceph osd df + +# Snapshot removal with force option should succeed +ceph fs subvolume snapshot rm cephfs sub_0 snap_0 --force + +#Cleanup from backend +ignore_failure sudo rm -f /tmp/error_${PID}_file +ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0 + +#Set the ratios back for other full tests to run +ceph osd set-full-ratio 0.95 +ceph osd set-nearfull-ratio 0.95 +ceph osd set-backfillfull-ratio 0.95 + +#After test +echo "After test" +df -h $CEPH_MNT +ceph osd df + +echo OK -- cgit v1.2.3