summaryrefslogtreecommitdiffstats
path: root/qa/standalone/osd/pg-split-merge.sh
blob: ad697a9fc3c88182ee764c2bef37bfaf7320bfef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env bash
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh

function run() {
    local dir=$1
    shift

    export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one
    export CEPH_ARGS
    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
    CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10"

    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
        $func $dir || return 1
    done
}

function TEST_a_merge_empty() {
    local dir=$1

    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=3 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
    run_osd $dir 1 || return 1
    run_osd $dir 2 || return 1

    ceph osd pool create foo 2 || return 1
    ceph osd pool set foo pgp_num 1 || return 1

    wait_for_clean || return 1

    # note: we need 1.0 to have the same or more objects than 1.1
    #  1.1
    rados -p foo put foo1 /etc/passwd
    rados -p foo put foo2 /etc/passwd
    rados -p foo put foo3 /etc/passwd
    rados -p foo put foo4 /etc/passwd
    #  1.0
    rados -p foo put foo5 /etc/passwd
    rados -p foo put foo6 /etc/passwd
    rados -p foo put foo8 /etc/passwd
    rados -p foo put foo10 /etc/passwd
    rados -p foo put foo11 /etc/passwd
    rados -p foo put foo12 /etc/passwd
    rados -p foo put foo16 /etc/passwd

    wait_for_clean || return 1

    ceph tell osd.1 config set osd_debug_no_purge_strays true
    ceph osd pool set foo size 2 || return 1
    wait_for_clean || return 1

    kill_daemons $dir TERM osd.2 || return 1
    ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1
    activate_osd $dir 2 || return 1

    wait_for_clean || return 1

    # osd.2: now 1.0 is there but 1.1 is not

    # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is
    # the problematic state... then let it merge with 1.0
    ceph tell osd.2 config set osd_debug_no_acting_change true
    ceph osd out 0 1
    ceph osd pool set foo pg_num 1
    sleep 5
    ceph tell osd.2 config set osd_debug_no_acting_change false

    # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get
    # removed
    ceph osd in 0 1
    ceph osd pool set foo size 3

    wait_for_clean || return 1

    # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing
    # half of its objects).
    ceph pg scrub 1.0
    sleep 10
    ceph log last debug
    ceph pg ls
    ceph pg ls | grep ' active.clean ' || return 1
}

function TEST_import_after_merge_and_gap() {
    local dir=$1

    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1

    ceph osd pool create foo 2 || return 1
    wait_for_clean || return 1
    rados -p foo bench 3 write -b 1024 --no-cleanup || return 1

    kill_daemons $dir TERM osd.0 || return 1
    ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1  --force || return 1
    ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0  --force || return 1
    activate_osd $dir 0 || return 1

    ceph osd pool set foo pg_num 1
    sleep 5
    while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done
    wait_for_clean || return 1

    #
    kill_daemons $dir TERM osd.0 || return 1
    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
    # this will import both halves the original pg
    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
    activate_osd $dir 0 || return 1

    wait_for_clean || return 1

    # make a map gap
    for f in `seq 1 50` ; do
	ceph osd set nodown
	ceph osd unset nodown
    done

    # poke and prod to ensure last_epech_clean is big, reported to mon, and
    # the osd is able to trim old maps
    rados -p foo bench 1 write -b 1024 --no-cleanup || return 1
    wait_for_clean || return 1
    ceph tell osd.0 send_beacon
    sleep 5
    ceph osd set nodown
    ceph osd unset nodown
    sleep 5

    kill_daemons $dir TERM osd.0 || return 1

    # this should fail.. 1.1 still doesn't exist
    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1

    ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1

    # this should fail too because of the gap
    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1

    # we can force it...
    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1
    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1

    # ...but the osd won't start, so remove it again.
    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1

    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1


    activate_osd $dir 0 || return 1

    wait_for_clean || return 1
}

function TEST_import_after_split() {
    local dir=$1

    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1

    ceph osd pool create foo 1 || return 1
    wait_for_clean || return 1
    rados -p foo bench 3 write -b 1024 --no-cleanup || return 1

    kill_daemons $dir TERM osd.0 || return 1
    ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0  --force || return 1
    activate_osd $dir 0 || return 1

    ceph osd pool set foo pg_num 2
    sleep 5
    while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
    wait_for_clean || return 1

    kill_daemons $dir TERM osd.0 || return 1

    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1

    # this should fail because 1.1 (split child) is there
    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1

    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
    # now it will work (1.1. is gone)
    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1

    activate_osd $dir 0 || return 1

    wait_for_clean || return 1
}


main pg-split-merge "$@"

# Local Variables:
# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh"
# End: