diff options
Diffstat (limited to 'qa/workunits/rados/test_crash.sh')
-rwxr-xr-x | qa/workunits/rados/test_crash.sh | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh new file mode 100755 index 000000000..6608d7872 --- /dev/null +++ b/qa/workunits/rados/test_crash.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +set -x + +# run on a single-node three-OSD cluster + +sudo killall -ABRT ceph-osd +sleep 5 + +# kill caused coredumps; find them and delete them, carefully, so as +# not to disturb other coredumps, or else teuthology will see them +# and assume test failure. sudos are because the core files are +# root/600 +for f in $(find $TESTDIR/archive/coredump -type f); do + gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f) + if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \ + ( \ + + expr match "$gdb_output" ".*terminated.*signal 6.*" || \ + expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \ + ) + then + sudo rm $f + fi +done + +# let daemon find crashdumps on startup +sudo systemctl restart ceph-crash +sleep 30 + +# must be 3 crashdumps registered and moved to crash/posted +[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header +[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1 + +# there should be a health warning +ceph health detail | grep RECENT_CRASH || exit 1 +ceph crash archive-all +sleep 30 +ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone! |