summaryrefslogtreecommitdiffstats
path: root/qa/workunits/rados/test_crash.sh
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /qa/workunits/rados/test_crash.sh
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rwxr-xr-xqa/workunits/rados/test_crash.sh39
1 files changed, 39 insertions, 0 deletions
diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh
new file mode 100755
index 000000000..6608d7872
--- /dev/null
+++ b/qa/workunits/rados/test_crash.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+set -x
+
+# run on a single-node three-OSD cluster
+
+sudo killall -ABRT ceph-osd
+sleep 5
+
+# kill caused coredumps; find them and delete them, carefully, so as
+# not to disturb other coredumps, or else teuthology will see them
+# and assume test failure. sudos are because the core files are
+# root/600
+for f in $(find $TESTDIR/archive/coredump -type f); do
+ gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
+ if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
+ ( \
+
+ expr match "$gdb_output" ".*terminated.*signal 6.*" || \
+ expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
+ )
+ then
+ sudo rm $f
+ fi
+done
+
+# let daemon find crashdumps on startup
+sudo systemctl restart ceph-crash
+sleep 30
+
+# must be 3 crashdumps registered and moved to crash/posted
+[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
+[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
+
+# there should be a health warning
+ceph health detail | grep RECENT_CRASH || exit 1
+ceph crash archive-all
+sleep 30
+ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!