3 files changed, 272 insertions, 0 deletions
diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh
new file mode 100755
index 000000000..a11131215
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_clone.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the  'ceph fs subvolume snapshot clone' when the osd is full.
+# The clone fails with 'MetadataMgrException: -28 (error in write)' and
+# truncates the config file of corresponding subvolume while updating the config file.
+# Hence the subsequent subvolume commands on the clone fails with
+# 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback.
+
+# The osd is of the size 1GB. The full-ratios are set so that osd is treated full
+# at around 600MB. The subvolume is created and 100MB is written.
+# The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds,
+# all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails
+# with ENOSPACE.
+
+# At this stage, ".meta" config file of the failed clones are checked if it's truncated.
+# and clone status command is checked for traceback.
+
+# Note that the failed clones would be in retry loop and it's state would be 'pending' or 'in-progress'.
+# It's state is not updated to 'failed' as the config update to gets ENOSPACE too.
+
+set -e
+ignore_failure() {
+        if "$@"; then return 0; else return 0; fi
+}
+
+expect_failure() {
+        if "$@"; then return 1; else return 0; fi
+}
+
+NUM_CLONES=10
+
+ceph fs subvolume create cephfs sub_0
+subvol_path_0=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+# For debugging
+echo "Before ratios are set"
+df $CEPH_MNT
+ceph osd df
+
+ceph osd set-full-ratio 0.6
+ceph osd set-nearfull-ratio 0.50
+ceph osd set-backfillfull-ratio 0.55
+
+# For debugging
+echo "After ratios are set"
+df -h
+ceph osd df
+
+for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done
+
+# For debugging
+echo "After subvolumes are written"
+df -h $CEPH_MNT
+ceph osd df
+
+# snapshot
+ceph fs subvolume snapshot create cephfs sub_0 snap_0
+
+# Set clone snapshot delay
+ceph config set mgr mgr/volumes/snapshot_clone_delay 15
+
+# Schedule few clones, some would fail with no space
+for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done
+
+# Wait for osd is full
+timeout=90
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+# For debugging
+echo "After osd is full"
+df -h $CEPH_MNT
+ceph osd df
+
+# Check clone status, this should not crash
+for i in $(eval echo {1..$NUM_CLONES})
+do
+  ignore_failure ceph fs clone status cephfs clone_$i >/tmp/out_${PID}_file 2>/tmp/error_${PID}_file
+  cat /tmp/error_${PID}_file
+  if grep "complete" /tmp/out_${PID}_file; then
+    echo "The clone_$i is completed"
+  else
+    #in-progress/pending clones, No traceback should be found in stderr
+    echo clone_$i in PENDING/IN-PROGRESS
+    expect_failure sudo grep "Traceback" /tmp/error_${PID}_file
+    #config file should not be truncated and GLOBAL section should be found
+    sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/clone_$i/.meta
+  fi
+done
+
+# Hard cleanup
+ignore_failure sudo rm -rf $CEPH_MNT/_index/clone/*
+ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/clone_*
+ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0
+ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h $CEPH_MNT
+ceph osd df
+
+echo OK
diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh
new file mode 100755
index 000000000..a464e30f5
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_rm.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command
+# when the osd is full. The command used to hang. The osd is of the size 1GB.
+# The subvolume is created and 500MB file is written. The full-ratios are
+# set below 500MB such that the osd is treated as full. Now the subvolume is
+# is removed. This should be successful with the introduction of FULL
+# capabilities which the mgr holds.
+
+set -e
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+ceph fs subvolume create cephfs sub_0
+subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+#For debugging
+echo "Before write"
+df -h
+ceph osd df
+
+sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500
+
+ceph osd set-full-ratio 0.2
+ceph osd set-nearfull-ratio 0.16
+ceph osd set-backfillfull-ratio 0.18
+
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#For debugging
+echo "After ratio set"
+df -h
+ceph osd df
+
+#Delete subvolume
+ceph fs subvolume rm cephfs sub_0
+
+#Validate subvolume is deleted
+expect_failure ceph fs subvolume info cephfs sub_0
+
+#Wait for subvolume to delete data
+trashdir=$CEPH_MNT/volumes/_deleting
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" &&  break
+  echo "Wating for trash dir to be empty: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h
+ceph osd df
+
+echo OK
diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
new file mode 100755
index 000000000..f6d0add9f
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the  'ceph fs subvolume snapshot rm' when the osd is full.
+# The snapshot rm fails with 'MetadataMgrException: -28 (error in write)' and
+# truncates the config file of corresponding subvolume. Hence the subsequent
+# snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)'
+# traceback.
+
+# The osd is of the size 1GB. The subvolume is created and 800MB file is written.
+# Then full-ratios are set below 500MB such that the osd is treated as full.
+# The subvolume snapshot is taken which succeeds as no extra space is required
+# for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it
+# fails to remove the snapshot metadata set. The snapshot removal fails
+# but should not traceback and truncate the config file.
+
+set -e
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+ignore_failure() {
+	if "$@"; then return 0; else return 0; fi
+}
+
+ceph fs subvolume create cephfs sub_0
+subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+#For debugging
+echo "Before write"
+df $CEPH_MNT
+ceph osd df
+
+# Write 800MB file and set full ratio to around 200MB
+ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync
+
+ceph osd set-full-ratio 0.2
+ceph osd set-nearfull-ratio 0.16
+ceph osd set-backfillfull-ratio 0.18
+
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#Take snapshot
+ceph fs subvolume snapshot create cephfs sub_0 snap_0
+
+#Remove snapshot fails but should not throw traceback
+expect_failure ceph fs subvolume snapshot rm cephfs sub_0 snap_0 2>/tmp/error_${PID}_file
+cat /tmp/error_${PID}_file
+
+# No traceback should be found
+expect_failure grep "Traceback" /tmp/error_${PID}_file
+
+# Validate config file is not truncated and GLOBAL section exists
+sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/sub_0/.meta
+
+#For debugging
+echo "After write"
+df $CEPH_MNT
+ceph osd df
+
+# Snapshot removal with force option should succeed
+ceph fs subvolume snapshot rm cephfs sub_0 snap_0 --force
+
+#Cleanup from backend
+ignore_failure sudo rm -f /tmp/error_${PID}_file
+ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h $CEPH_MNT
+ceph osd df
+
+echo OK