blob: 4745108c5babb98f1c2e435573e58a0096d3d5bf (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
#!/usr/bin/env bash
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--debug_disable_randomized_ping=true "
CEPH_ARGS+="--debug_heartbeat_testing_span=5 "
CEPH_ARGS+="--osd_heartbeat_interval=1 "
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function TEST_network_ping_test1() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
sleep 5
create_pool foo 16
# write some objects
timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
# Get 1 cycle worth of ping data "1 minute"
sleep 10
flush_pg_stats
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
# Wait another 4 cycles to get "5 minute interval"
sleep 20
flush_pg_stats
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
# Wait another 10 cycles to get "15 minute interval"
sleep 50
flush_pg_stats
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
# Just check the threshold output matches the input
CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json
test "$(cat $dir/json | jq '.threshold')" = "99" || return 1
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json
test "$(cat $dir/json | jq '.threshold')" = "98" || return 1
rm -f $dir/json
}
# Test setting of mon_warn_on_slow_ping_time very low to
# get health warning
function TEST_network_ping_test2() {
local dir=$1
export CEPH_ARGS
export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001"
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
sleep 5
ceph osd crush add-bucket dc1 datacenter
ceph osd crush add-bucket dc2 datacenter
ceph osd crush add-bucket dc3 datacenter
ceph osd crush add-bucket rack1 rack
ceph osd crush add-bucket rack2 rack
ceph osd crush add-bucket rack3 rack
ceph osd crush add-bucket host1 host
ceph osd crush add-bucket host2 host
ceph osd crush add-bucket host3 host
ceph osd crush move dc1 root=default
ceph osd crush move dc2 root=default
ceph osd crush move dc3 root=default
ceph osd crush move rack1 datacenter=dc1
ceph osd crush move rack2 datacenter=dc2
ceph osd crush move rack3 datacenter=dc3
ceph osd crush move host1 rack=rack1
ceph osd crush move host2 rack=rack2
ceph osd crush move host3 rack=rack3
ceph osd crush set osd.0 1.0 host=host1
ceph osd crush set osd.1 1.0 host=host2
ceph osd crush set osd.2 1.0 host=host3
ceph osd crush rule create-simple myrule default host firstn
create_pool foo 16 16 replicated myrule
# write some objects
timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
# Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings)
sleep 10
flush_pg_stats
ceph health | tee $dir/health
grep -q "Slow OSD heartbeats" $dir/health || return 1
ceph health detail | tee $dir/health
grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
rm -f $dir/health
}
main network-ping "$@"
# Local Variables:
# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh"
# End:
|