summaryrefslogtreecommitdiffstats
path: root/qa/workunits/rbd/krbd_watch_errors.sh
blob: f650d2a748944688d7aaa81d3f93129736696505 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env bash

set -ex
set -o pipefail

function refresh_loop() {
    local dev_id="$1"

    set +x

    local i
    for ((i = 1; ; i++)); do
        echo 1 | sudo tee "${SYSFS_DIR}/${dev_id}/refresh" > /dev/null
        if ((i % 100 == 0)); then
            echo "Refreshed ${i} times"
        fi
    done
}

readonly SYSFS_DIR="/sys/bus/rbd/devices"
readonly IMAGE_NAME="watch-errors-test"

rbd create -s 1G --image-feature exclusive-lock "${IMAGE_NAME}"

# induce a watch error every 30 seconds
dev="$(sudo rbd device map -o osdkeepalive=60 "${IMAGE_NAME}")"
dev_id="${dev#/dev/rbd}"

# constantly refresh, not just on watch errors
refresh_loop "${dev_id}" &
refresh_pid=$!

sudo dmesg -C

# test that none of the above triggers a deadlock with a workload
fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \
    --rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \
    --time_based --runtime=1h --eta=never

num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")"
echo "Recorded ${num_errors} watch errors"

kill "${refresh_pid}"
wait

sudo rbd device unmap "${dev}"

if ((num_errors < 60)); then
    echo "Too few watch errors"
    exit 1
fi

echo OK