summaryrefslogtreecommitdiffstats
path: root/qa/standalone/misc/network-ping.sh
blob: 4745108c5babb98f1c2e435573e58a0096d3d5bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env bash

source $CEPH_ROOT/qa/standalone/ceph-helpers.sh

function run() {
    local dir=$1
    shift

    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
    export CEPH_ARGS
    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
    CEPH_ARGS+="--mon-host=$CEPH_MON "
    CEPH_ARGS+="--debug_disable_randomized_ping=true "
    CEPH_ARGS+="--debug_heartbeat_testing_span=5 "
    CEPH_ARGS+="--osd_heartbeat_interval=1 "
    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
        setup $dir || return 1
        $func $dir || return 1
        teardown $dir || return 1
    done
}

function TEST_network_ping_test1() {
    local dir=$1

    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
    run_osd $dir 1 || return 1
    run_osd $dir 2 || return 1

    sleep 5

    create_pool foo 16

    # write some objects
    timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1

    # Get 1 cycle worth of ping data "1 minute"
    sleep 10
    flush_pg_stats

    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1

    # Wait another 4 cycles to get "5 minute interval"
    sleep 20
    flush_pg_stats
    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1


    # Wait another 10 cycles to get "15 minute interval"
    sleep 50
    flush_pg_stats
    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1

    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
    test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1

    # Just check the threshold output matches the input
    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json
    test "$(cat $dir/json | jq '.threshold')" = "99" || return 1
    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json
    test "$(cat $dir/json | jq '.threshold')" = "98" || return 1

    rm -f $dir/json
}

# Test setting of mon_warn_on_slow_ping_time very low to
# get health warning
function TEST_network_ping_test2() {
    local dir=$1

    export CEPH_ARGS
    export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001"
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
    run_osd $dir 1 || return 1
    run_osd $dir 2 || return 1

    sleep 5
    ceph osd crush add-bucket dc1 datacenter
    ceph osd crush add-bucket dc2 datacenter
    ceph osd crush add-bucket dc3 datacenter
    ceph osd crush add-bucket rack1 rack
    ceph osd crush add-bucket rack2 rack
    ceph osd crush add-bucket rack3 rack
    ceph osd crush add-bucket host1 host
    ceph osd crush add-bucket host2 host
    ceph osd crush add-bucket host3 host
    ceph osd crush move dc1 root=default
    ceph osd crush move dc2 root=default
    ceph osd crush move dc3 root=default
    ceph osd crush move rack1 datacenter=dc1
    ceph osd crush move rack2 datacenter=dc2
    ceph osd crush move rack3 datacenter=dc3
    ceph osd crush move host1 rack=rack1
    ceph osd crush move host2 rack=rack2
    ceph osd crush move host3 rack=rack3
    ceph osd crush set osd.0 1.0 host=host1
    ceph osd crush set osd.1 1.0 host=host2
    ceph osd crush set osd.2 1.0 host=host3
    ceph osd crush rule create-simple myrule default host firstn

    create_pool foo 16 16 replicated myrule

    # write some objects
    timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1

    # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings)
    sleep 10
    flush_pg_stats

    ceph health | tee $dir/health
    grep -q "Slow OSD heartbeats" $dir/health || return 1

    ceph health detail | tee $dir/health
    grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
    grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
    grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
    rm -f $dir/health
}

main network-ping "$@"

# Local Variables:
# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh"
# End: