blob: 26a4c9bdc151f71fcd085b8f5938f0cb0ed3945f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
#!/bin/sh
set -x
# run on a single-node three-OSD cluster
sudo killall -ABRT ceph-osd
sleep 5
# kill caused coredumps; find them and delete them, carefully, so as
# not to disturb other coredumps, or else teuthology will see them
# and assume test failure. sudos are because the core files are
# root/600
for f in $(find $TESTDIR/archive/coredump -type f); do
gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
( \
expr match "$gdb_output" ".*terminated.*signal 6.*" || \
expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
)
then
sudo rm $f
fi
done
# ceph-crash runs as the unprivileged "ceph" user, but when under test
# the ceph osd daemons are running as root, so their crash files aren't
# readable. let's chown them so they behave as they would in real life.
sudo chown -R ceph:ceph /var/lib/ceph/crash
# let daemon find crashdumps on startup
sudo systemctl restart ceph-crash
sleep 30
# must be 3 crashdumps registered and moved to crash/posted
[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
# there should be a health warning
ceph health detail | grep RECENT_CRASH || exit 1
ceph crash archive-all
sleep 30
ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!
|