summaryrefslogtreecommitdiffstats
path: root/qa/standalone/mon/mon-last-epoch-clean.sh
blob: 82243103e6e110fdccac13abdb9b3d282c089e2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#!/usr/bin/env bash

source $CEPH_ROOT/qa/standalone/ceph-helpers.sh


function run() {
    local dir=$1
    shift

    export CEPH_MON="127.0.0.1:7302" # git grep '\<7105\>' : there must be only one
    export CEPH_ARGS
    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
    CEPH_ARGS+="--mon-host=$CEPH_MON "

    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
        setup $dir || return 1
        $func $dir || return 1
        teardown $dir || return 1
    done
}


function check_lec_equals_pools() {

  local pool_id=$1

  report=$(ceph report)
  lec=$(echo $report | \
    jq '.osdmap_clean_epochs.min_last_epoch_clean')

  if [[ -z "$pool_id" ]]; then
    pools=($(echo $report | \
      jq \
      ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
      " select(.floor == $lec) | .poolid"))

    [[ ${#pools[*]} -eq 2 ]] || ( echo $report ; return 1 )
  else
    floor=($(echo $report | \
      jq \
      ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
      " select(.poolid == $pool_id) | .floor"))

    [[ $lec -eq $floor ]] || ( echo $report ; return 1 )
  fi
  return 0
}

function check_lec_lower_than_pool() {

  local pool_id=$1
  [[ -z "$pool_id" ]] && ( echo "expected pool_id as parameter" ; exit 1 )

  report=$(ceph report)
  lec=$(echo $report | \
    jq '.osdmap_clean_epochs.min_last_epoch_clean')

  floor=($(echo $report | \
    jq \
    ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
    " select(.poolid == $pool_id) | .floor"))

  [[ $lec -lt $floor ]] || ( echo $report ; return 1 )
  return 0
}

function check_floor_pool_greater_than_pool() {

  local pool_a=$1
  local pool_b=$1
  [[ -z "$pool_a" ]] && ( echo "expected id as first parameter" ; exit 1 )
  [[ -z "$pool_b" ]] && ( echo "expected id as second parameter" ; exit 1 )

  report=$(ceph report)

  floor_a=($(echo $report | \
    jq \
    ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
    " select(.poolid == $pool_a) | .floor"))

  floor_b=($(echo $report | \
    jq \
    ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
    " select(.poolid == $pool_b) | .floor"))

  [[ $floor_a -gt $floor_b ]] || ( echo $report ; return 1 )
  return 0
}

function check_lec_honours_osd() {

  local osd=$1

  report=$(ceph report)
  lec=$(echo $report | \
    jq '.osdmap_clean_epochs.min_last_epoch_clean')

  if [[ -z "$osd" ]]; then
    osds=($(echo $report | \
      jq \
      ".osdmap_clean_epochs.osd_epochs[] |" \
      " select(.epoch >= $lec) | .id"))

    [[ ${#osds[*]} -eq 3 ]] || ( echo $report ; return 1 )
  else
    epoch=($(echo $report | \
      jq \
      ".osdmap_clean_epochs.osd_epochs[] |" \
      " select(.id == $id) | .epoch"))
    [[ ${#epoch[*]} -eq 1 ]] || ( echo $report ; return 1 )
    [[ ${epoch[0]} -ge $lec ]] || ( echo $report ; return 1 )
  fi

  return 0
}

function validate_fc() {
  report=$(ceph report)
  lec=$(echo $report | \
    jq '.osdmap_clean_epochs.min_last_epoch_clean')
  osdm_fc=$(echo $report | \
    jq '.osdmap_first_committed')

  [[ $lec -eq $osdm_fc ]] || ( echo $report ; return 1 )
  return 0
}

function get_fc_lc_diff() {
  report=$(ceph report)
  osdm_fc=$(echo $report | \
    jq '.osdmap_first_committed')
  osdm_lc=$(echo $report | \
    jq '.osdmap_last_committed')

  echo $((osdm_lc - osdm_fc))
}

function get_pool_id() {

  local pn=$1
  [[ -z "$pn" ]] && ( echo "expected pool name as argument" ; exit 1 )

  report=$(ceph report)
  pool_id=$(echo $report | \
    jq ".osdmap.pools[] | select(.pool_name == \"$pn\") | .pool")

  [[ $pool_id -ge 0 ]] || \
    ( echo "unexpected pool id for pool \'$pn\': $pool_id" ; return -1 )

  echo $pool_id
  return 0
}

function wait_for_total_num_maps() {
  # rip wait_for_health, becaue it's easier than deduplicating the code
  local -a delays=($(get_timeout_delays $TIMEOUT .1))
  local -i loop=0
  local -i v_diff=$1

  while [[ $(get_fc_lc_diff) -gt $v_diff ]]; do
    if (( $loop >= ${#delays[*]} )) ; then
      echo "maps were not trimmed"
      return 1
    fi
    sleep ${delays[$loop]}
    loop+=1
  done 
}

function TEST_mon_last_clean_epoch() {

  local dir=$1

  run_mon $dir a || return 1
  run_mgr $dir x --mon-warn-on-pool-no-app=false || return 1
  run_osd $dir 0 || return 1
  run_osd $dir 1 || return 1
  run_osd $dir 2 || return 1
  osd_pid=$(cat $dir/osd.2.pid)

  sleep 5

  ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1
  ceph tell 'mon.*' injectargs \
    '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1

  create_pool foo 32
  create_pool bar 32

  foo_id=$(get_pool_id "foo")
  bar_id=$(get_pool_id "bar")

  [[ $foo_id -lt 0 ]] && ( echo "couldn't find pool 'foo' id" ; exit 1 )
  [[ $bar_id -lt 0 ]] && ( echo "couldn't find pool 'bar' id" ; exit 1 )

  # no real clue why we are getting these warnings, but let's make them go
  # away so we can be happy.

  ceph osd set-full-ratio 0.97
  ceph osd set-backfillfull-ratio 0.97

  wait_for_health_ok || exit 1

  pre_map_diff=$(get_fc_lc_diff)
  wait_for_total_num_maps 2
  post_map_diff=$(get_fc_lc_diff)

  [[ $post_map_diff -le $pre_map_diff ]] || exit 1

  pre_map_diff=$post_map_diff

  ceph osd pool set foo size 3
  ceph osd pool set bar size 3

  wait_for_health_ok || exit 1

  check_lec_equals_pools || exit 1
  check_lec_honours_osd || exit 1
  validate_fc || exit 1

  # down osd.2; expected result (because all pools' size equals 3):
  # - number of committed maps increase over 2
  # - lec equals fc
  # - lec equals osd.2's epoch
  # - all pools have floor equal to lec

  while kill $osd_pid ; do sleep 1 ; done
  ceph osd out 2
  sleep 5 # seriously, just to make sure things settle; we may not need this.

  # generate some maps
  for ((i=0; i <= 10; ++i)); do
    ceph osd set noup
    sleep 1
    ceph osd unset noup
    sleep 1
  done

  post_map_diff=$(get_fc_lc_diff)
  [[ $post_map_diff -gt 2 ]] || exit 1

  validate_fc || exit 1
  check_lec_equals_pools || exit 1
  check_lec_honours_osd 2 || exit 1

  # adjust pool 'bar' size to 2; expect:
  # - number of committed maps still over 2
  # - lec equals fc
  # - lec equals pool 'foo' floor
  # - pool 'bar' floor greater than pool 'foo'

  ceph osd pool set bar size 2

  diff_ver=$(get_fc_lc_diff)
  [[ $diff_ver -gt 2 ]] || exit 1

  validate_fc || exit 1

  check_lec_equals_pools $foo_id || exit 1
  check_lec_lower_than_pool $bar_id || exit 1

  check_floor_pool_greater_than_pool $bar_id $foo_id || exit 1

  # set pool 'foo' size to 2; expect:
  # - health_ok
  # - lec equals pools
  # - number of committed maps decreases
  # - lec equals fc

  pre_map_diff=$(get_fc_lc_diff)

  ceph osd pool set foo size 2 || exit 1
  wait_for_clean || exit 1

  check_lec_equals_pools || exit 1
  validate_fc || exit 1

  if ! wait_for_total_num_maps 2 ; then
    post_map_diff=$(get_fc_lc_diff)
    # number of maps is decreasing though, right?
    [[ $post_map_diff -lt $pre_map_diff ]] || exit 1
  fi

  # bring back osd.2; expect:
  # - health_ok
  # - lec equals fc
  # - number of committed maps equals 2
  # - all pools have floor equal to lec

  pre_map_diff=$(get_fc_lc_diff)

  activate_osd $dir 2 || exit 1
  wait_for_health_ok || exit 1
  validate_fc || exit 1
  check_lec_equals_pools || exit 1

  if ! wait_for_total_num_maps 2 ; then
    post_map_diff=$(get_fc_lc_diff)
    # number of maps is decreasing though, right?
    [[ $post_map_diff -lt $pre_map_diff ]] || exit 1
  fi

  return 0
}

main mon-last-clean-epoch "$@"