summaryrefslogtreecommitdiffstats
path: root/qa/workunits/rados/test_crash.sh
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xqa/workunits/rados/test_crash.sh44
1 files changed, 44 insertions, 0 deletions
diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh
new file mode 100755
index 000000000..26a4c9bdc
--- /dev/null
+++ b/qa/workunits/rados/test_crash.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+set -x
+
+# run on a single-node three-OSD cluster
+
+sudo killall -ABRT ceph-osd
+sleep 5
+
+# kill caused coredumps; find them and delete them, carefully, so as
+# not to disturb other coredumps, or else teuthology will see them
+# and assume test failure. sudos are because the core files are
+# root/600
+for f in $(find $TESTDIR/archive/coredump -type f); do
+ gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
+ if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
+ ( \
+
+ expr match "$gdb_output" ".*terminated.*signal 6.*" || \
+ expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
+ )
+ then
+ sudo rm $f
+ fi
+done
+
+# ceph-crash runs as the unprivileged "ceph" user, but when under test
+# the ceph osd daemons are running as root, so their crash files aren't
+# readable. let's chown them so they behave as they would in real life.
+sudo chown -R ceph:ceph /var/lib/ceph/crash
+
+# let daemon find crashdumps on startup
+sudo systemctl restart ceph-crash
+sleep 30
+
+# must be 3 crashdumps registered and moved to crash/posted
+[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
+[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
+
+# there should be a health warning
+ceph health detail | grep RECENT_CRASH || exit 1
+ceph crash archive-all
+sleep 30
+ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!