1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
|
#!/usr/bin/env bash
#
# Copyright (C) 2017 Red Hat <contact@redhat.com>
#
# Author: David Zafman <dzafman@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library Public License for more details.
#
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift
# Fix port????
export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
# so we will not force auth_log_shard to be acting_primary
CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
export margin=10
export objects=200
export poolname=test
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function below_margin() {
local -i check=$1
shift
local -i target=$1
return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
}
function above_margin() {
local -i check=$1
shift
local -i target=$1
return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
}
FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
function check() {
local dir=$1
local PG=$2
local primary=$3
local type=$4
local degraded_start=$5
local degraded_end=$6
local misplaced_start=$7
local misplaced_end=$8
local primary_start=${9:-}
local primary_end=${10:-}
local log=$dir/osd.${primary}.log
local addp=" "
if [ "$type" = "erasure" ];
then
addp="p"
fi
UPACT=$(eval $FIND_UPACT)
# Check 3rd line at start because of false recovery starts
local which="degraded"
FIRST=$(eval $FIND_FIRST)
below_margin $FIRST $degraded_start || return 1
LAST=$(eval $FIND_LAST)
above_margin $LAST $degraded_end || return 1
# Check 3rd line at start because of false recovery starts
which="misplaced"
FIRST=$(eval $FIND_FIRST)
below_margin $FIRST $misplaced_start || return 1
LAST=$(eval $FIND_LAST)
above_margin $LAST $misplaced_end || return 1
# This is the value of set into MISSING_ON_PRIMARY
if [ -n "$primary_start" ];
then
which="shard $primary"
FIRST=$(eval $FIND_FIRST)
below_margin $FIRST $primary_start || return 1
LAST=$(eval $FIND_LAST)
above_margin $LAST $primary_end || return 1
fi
}
# [1,0,?] -> [1,2,4]
# degraded 500 -> 0
# active+recovering+degraded
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467
function do_recovery_out1() {
local dir=$1
shift
local type=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
run_osd $dir 4 || return 1
run_osd $dir 5 || return 1
if [ $type = "erasure" ];
then
ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
create_pool $poolname 1 1 $type myprofile
else
create_pool $poolname 1 1 $type
fi
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
# Only 2 OSDs so only 1 not primary
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set norecover
kill $(cat $dir/osd.${otherosd}.pid)
ceph osd down osd.${otherosd}
ceph osd out osd.${otherosd}
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
check $dir $PG $primary $type $objects 0 0 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
function TEST_recovery_replicated_out1() {
local dir=$1
do_recovery_out1 $dir replicated || return 1
}
function TEST_recovery_erasure_out1() {
local dir=$1
do_recovery_out1 $dir erasure || return 1
}
# [0, 1] -> [2,3,4,5]
# degraded 1000 -> 0
# misplaced 1000 -> 0
# missing on primary 500 -> 0
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748
function TEST_recovery_sizeup() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
run_osd $dir 4 || return 1
run_osd $dir 5 || return 1
create_pool $poolname 1 1
ceph osd pool set $poolname size 2
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
# Only 2 OSDs so only 1 not primary
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set norecover
ceph osd out osd.$primary osd.$otherosd
ceph osd pool set test size 4
ceph osd unset norecover
# Get new primary
primary=$(get_primary $poolname obj1)
ceph tell osd.${primary} debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
local degraded=$(expr $objects \* 2)
local misplaced=$(expr $objects \* 2)
local log=$dir/osd.${primary}.log
check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
# [0, 1, 2, 4] -> [3, 5]
# misplaced 1000 -> 0
# missing on primary 500 -> 0
# active+recovering+degraded
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248
function TEST_recovery_sizedown() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
run_osd $dir 4 || return 1
run_osd $dir 5 || return 1
create_pool $poolname 1 1
ceph osd pool set $poolname size 4
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
# Only 2 OSDs so only 1 not primary
local allosds=$(get_osds $poolname obj1)
ceph osd set norecover
for osd in $allosds
do
ceph osd out osd.$osd
done
ceph osd pool set test size 2
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
# Get new primary
primary=$(get_primary $poolname obj1)
local misplaced=$(expr $objects \* 2)
local log=$dir/osd.${primary}.log
check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1
UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/")
# This is the value of set into MISSING_ON_PRIMARY
FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/")
below_margin $FIRST $objects || return 1
LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/")
above_margin $LAST 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
# [1] -> [1,2]
# degraded 300 -> 200
# active+recovering+undersized+degraded
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563
function TEST_recovery_undersized() {
local dir=$1
local osds=3
run_mon $dir a || return 1
run_mgr $dir x || return 1
for i in $(seq 0 $(expr $osds - 1))
do
run_osd $dir $i || return 1
done
create_pool $poolname 1 1
ceph osd pool set $poolname size 1 --yes-i-really-mean-it
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
ceph osd set norecover
# Mark any osd not the primary (only 1 replica so also has no replica)
for i in $(seq 0 $(expr $osds - 1))
do
if [ $i = $primary ];
then
continue
fi
ceph osd out osd.$i
break
done
ceph osd pool set test size 4
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
# Give extra sleep time because code below doesn't have the sophistication of wait_for_clean()
sleep 10
flush_pg_stats || return 1
# Wait for recovery to finish
# Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded
# to active+undersized+degraded
for i in $(seq 1 300)
do
if ceph pg dump pgs | grep ^$PG | grep -qv recovering
then
break
fi
if [ $i = "300" ];
then
echo "Timeout waiting for recovery to finish"
return 1
fi
sleep 1
done
# Get new primary
primary=$(get_primary $poolname obj1)
local log=$dir/osd.${primary}.log
local first_degraded=$(expr $objects \* 3)
local last_degraded=$(expr $objects \* 2)
check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
# [1,0,2] -> [1,3,NONE]/[1,3,2]
# degraded 100 -> 0
# misplaced 100 -> 100
# active+recovering+degraded+remapped
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242
function TEST_recovery_erasure_remapped() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
create_pool $poolname 1 1 erasure myprofile
ceph osd pool set $poolname min_size 2
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set norecover
kill $(cat $dir/osd.${otherosd}.pid)
ceph osd down osd.${otherosd}
ceph osd out osd.${otherosd}
# Mark osd not the primary and not down/out osd as just out
for i in 0 1 2 3
do
if [ $i = $primary ];
then
continue
fi
if [ $i = $otherosd ];
then
continue
fi
ceph osd out osd.$i
break
done
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
local log=$dir/osd.${primary}.log
check $dir $PG $primary erasure $objects 0 $objects $objects || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
function TEST_recovery_multi() {
local dir=$1
local osds=6
run_mon $dir a || return 1
run_mgr $dir x || return 1
for i in $(seq 0 $(expr $osds - 1))
do
run_osd $dir $i || return 1
done
create_pool $poolname 1 1
ceph osd pool set $poolname size 3
ceph osd pool set $poolname min_size 1
wait_for_clean || return 1
rados -p $poolname put obj1 /dev/null
local primary=$(get_primary $poolname obj1)
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set noout
ceph osd set norecover
kill $(cat $dir/osd.${otherosd}.pid)
ceph osd down osd.${otherosd}
local half=$(expr $objects / 2)
for i in $(seq 2 $half)
do
rados -p $poolname put obj$i /dev/null
done
kill $(cat $dir/osd.${primary}.pid)
ceph osd down osd.${primary}
activate_osd $dir ${otherosd}
sleep 3
for i in $(seq $(expr $half + 1) $objects)
do
rados -p $poolname put obj$i /dev/null
done
local PG=$(get_pg $poolname obj1)
local otherosd=$(get_not_primary $poolname obj$objects)
ceph osd unset noout
ceph osd out osd.$primary osd.$otherosd
activate_osd $dir ${primary}
sleep 3
ceph osd pool set test size 4
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
# Get new primary
primary=$(get_primary $poolname obj1)
local log=$dir/osd.${primary}.log
check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
main osd-recovery-stats "$@"
# Local Variables:
# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"
# End:
|