summaryrefslogtreecommitdiffstats
path: root/qa/workunits/rados/test_crash.sh
diff options
context:
space:
mode:
Diffstat (limited to 'qa/workunits/rados/test_crash.sh')
-rwxr-xr-xqa/workunits/rados/test_crash.sh39
1 files changed, 39 insertions, 0 deletions
diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh
new file mode 100755
index 000000000..6608d7872
--- /dev/null
+++ b/qa/workunits/rados/test_crash.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+set -x
+
+# run on a single-node three-OSD cluster
+
+sudo killall -ABRT ceph-osd
+sleep 5
+
+# kill caused coredumps; find them and delete them, carefully, so as
+# not to disturb other coredumps, or else teuthology will see them
+# and assume test failure. sudos are because the core files are
+# root/600
+for f in $(find $TESTDIR/archive/coredump -type f); do
+ gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
+ if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
+ ( \
+
+ expr match "$gdb_output" ".*terminated.*signal 6.*" || \
+ expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
+ )
+ then
+ sudo rm $f
+ fi
+done
+
+# let daemon find crashdumps on startup
+sudo systemctl restart ceph-crash
+sleep 30
+
+# must be 3 crashdumps registered and moved to crash/posted
+[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
+[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
+
+# there should be a health warning
+ceph health detail | grep RECENT_CRASH || exit 1
+ceph crash archive-all
+sleep 30
+ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!