summaryrefslogtreecommitdiffstats
path: root/qa/standalone/osd
diff options
context:
space:
mode:
Diffstat (limited to 'qa/standalone/osd')
-rwxr-xr-xqa/standalone/osd/bad-inc-map.sh62
-rwxr-xr-xqa/standalone/osd/divergent-priors.sh855
-rwxr-xr-xqa/standalone/osd/ec-error-rollforward.sh66
-rwxr-xr-xqa/standalone/osd/osd-bench.sh97
-rwxr-xr-xqa/standalone/osd/osd-bluefs-volume-ops.sh497
-rwxr-xr-xqa/standalone/osd/osd-config.sh97
-rwxr-xr-xqa/standalone/osd/osd-copy-from.sh68
-rwxr-xr-xqa/standalone/osd/osd-dup.sh30
-rwxr-xr-xqa/standalone/osd/osd-fast-mark-down.sh111
-rwxr-xr-xqa/standalone/osd/osd-force-create-pg.sh53
-rwxr-xr-xqa/standalone/osd/osd-markdown.sh149
-rwxr-xr-xqa/standalone/osd/osd-reactivate.sh56
-rwxr-xr-xqa/standalone/osd/osd-recovery-prio.sh542
-rwxr-xr-xqa/standalone/osd/osd-recovery-space.sh176
-rwxr-xr-xqa/standalone/osd/osd-recovery-stats.sh512
-rwxr-xr-xqa/standalone/osd/osd-rep-recov-eio.sh422
-rwxr-xr-xqa/standalone/osd/osd-reuse-id.sh53
-rwxr-xr-xqa/standalone/osd/pg-split-merge.sh203
-rwxr-xr-xqa/standalone/osd/repeer-on-acting-back.sh129
-rwxr-xr-xqa/standalone/osd/repro_long_log.sh197
20 files changed, 4375 insertions, 0 deletions
diff --git a/qa/standalone/osd/bad-inc-map.sh b/qa/standalone/osd/bad-inc-map.sh
new file mode 100755
index 000000000..cc3cf27cc
--- /dev/null
+++ b/qa/standalone/osd/bad-inc-map.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+mon_port=$(get_unused_port)
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:$mon_port"
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ set -e
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_bad_inc_map() {
+ local dir=$1
+
+ run_mon $dir a
+ run_mgr $dir x
+ run_osd $dir 0
+ run_osd $dir 1
+ run_osd $dir 2
+
+ ceph config set osd.2 osd_inject_bad_map_crc_probability 1
+
+ # osd map churn
+ create_pool foo 8
+ ceph osd pool set foo min_size 1
+ ceph osd pool set foo min_size 2
+
+ sleep 5
+
+ # make sure all the OSDs are still up
+ TIMEOUT=10 wait_for_osd up 0
+ TIMEOUT=10 wait_for_osd up 1
+ TIMEOUT=10 wait_for_osd up 2
+
+ # check for the signature in the log
+ grep "injecting map crc failure" $dir/osd.2.log || return 1
+ grep "bailing because last" $dir/osd.2.log || return 1
+
+ echo success
+
+ delete_pool foo
+ kill_daemons $dir || return 1
+}
+
+main bad-inc-map "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh"
+# End:
diff --git a/qa/standalone/osd/divergent-priors.sh b/qa/standalone/osd/divergent-priors.sh
new file mode 100755
index 000000000..40d72544d
--- /dev/null
+++ b/qa/standalone/osd/divergent-priors.sh
@@ -0,0 +1,855 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # This should multiple of 6
+ export loglen=12
+ export divisor=3
+ export trim=$(expr $loglen / 2)
+ export DIVERGENT_WRITE=$(expr $trim / $divisor)
+ export DIVERGENT_REMOVE=$(expr $trim / $divisor)
+ export DIVERGENT_CREATE=$(expr $trim / $divisor)
+ export poolname=test
+ export testobjects=100
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # so we will not force auth_log_shard to be acting_primary
+ CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+ CEPH_ARGS+="--osd_debug_pg_log_writeout=true "
+ CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+# Special case divergence test
+# Test handling of divergent entries with prior_version
+# prior to log_tail
+# based on qa/tasks/divergent_prior.py
+function TEST_divergent() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ local case5=$testobjects
+ local case3=$(expr $testobjects - 1)
+ # Write some soon to be divergent
+ echo 'writing divergent object'
+ rados -p $poolname put existing_$case5 $dummyfile &
+ echo 'create missing divergent object'
+ inject_eio rep data $poolname existing_$case3 $dir 0 || return 1
+ rados -p $poolname get existing_$case3 $dir/existing &
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+ #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+
+ sleep 20
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #5
+ if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+function TEST_divergent_ec() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_ec_pool $poolname true k=2 m=1 || return 1
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ # Write some soon to be divergent
+ echo 'writing divergent object'
+ rados -p $poolname put existing_$testobjects $dummyfile2 &
+ sleep 1
+ rados -p $poolname put existing_$testobjects $dummyfile &
+ rados -p $poolname mksnap snap1
+ rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile &
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ sleep 5
+ #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # Dump logs
+ for i in $non_divergent
+ do
+ kill_daemons $dir KILL osd.$i || return 1
+ _objectstore_tool_nodown $dir $i --op log --pgid $pgid
+ activate_osd $dir $i || return 1
+ done
+ _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+ #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+
+ sleep 20
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #3
+ # XXX: Not reproducing this case
+# if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log')
+# then
+# echo failure
+# return 1
+# fi
+ # Check for _merge_object_divergent_entries for case #4
+ if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# Special case divergence test with ceph-objectstore-tool export/remove/import
+# Test handling of divergent entries with prior_version
+# prior to log_tail and a ceph-objectstore-tool export/import
+# based on qa/tasks/divergent_prior2.py
+function TEST_divergent_2() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ # Do some creates to hit case 2
+ echo 'create new divergent objects'
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname create newobject_$i &
+ done
+ # Write some soon to be divergent
+ echo 'writing divergent objects'
+ for i in $(seq 1 $DIVERGENT_WRITE)
+ do
+ rados -p $poolname put existing_$i $dummyfile2 &
+ done
+ # Remove some soon to be divergent
+ echo 'remove divergent objects'
+ for i in $(seq 1 $DIVERGENT_REMOVE)
+ do
+ rmi=$(expr $i + $DIVERGENT_WRITE)
+ rados -p $poolname rm existing_$rmi &
+ done
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ # At this point the divergent_priors should have been detected
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+
+ # export a pg
+ expfile=$dir/exp.$$.out
+ _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile
+ _objectstore_tool_nodown $dir $divergent --op import --file $expfile
+
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+ wait_for_osd up $divergent
+
+ sleep 20
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname get newobject_$i $dir/existing
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #1
+ if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ # Check for _merge_object_divergent_entries for case #2
+ if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ rm $dir/$expfile
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# this is the same as case _2 above, except we enable pg autoscaling in order
+# to reproduce https://tracker.ceph.com/issues/41816
+function TEST_divergent_3() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+
+ # reproduce https://tracker.ceph.com/issues/41816
+ ceph osd pool set $poolname pg_autoscale_mode on
+
+ divergent=-1
+ start_time=$(date +%s)
+ max_duration=300
+
+ while [ "$divergent" -le -1 ]
+ do
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+
+ current_time=$(date +%s)
+ elapsed_time=$(expr $current_time - $start_time)
+ if [ "$elapsed_time" -gt "$max_duration" ]; then
+ echo "timed out waiting for divergent"
+ return 1
+ fi
+ done
+
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ # Do some creates to hit case 2
+ echo 'create new divergent objects'
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname create newobject_$i &
+ done
+ # Write some soon to be divergent
+ echo 'writing divergent objects'
+ for i in $(seq 1 $DIVERGENT_WRITE)
+ do
+ rados -p $poolname put existing_$i $dummyfile2 &
+ done
+ # Remove some soon to be divergent
+ echo 'remove divergent objects'
+ for i in $(seq 1 $DIVERGENT_REMOVE)
+ do
+ rmi=$(expr $i + $DIVERGENT_WRITE)
+ rados -p $poolname rm existing_$rmi &
+ done
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ # At this point the divergent_priors should have been detected
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+
+ # export a pg
+ expfile=$dir/exp.$$.out
+ _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile
+ _objectstore_tool_nodown $dir $divergent --op import --file $expfile
+
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+ wait_for_osd up $divergent
+
+ sleep 20
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname get newobject_$i $dir/existing
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #1
+ if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ # Check for _merge_object_divergent_entries for case #2
+ if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ rm $dir/$expfile
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+main divergent-priors "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh"
+# End:
diff --git a/qa/standalone/osd/ec-error-rollforward.sh b/qa/standalone/osd/ec-error-rollforward.sh
new file mode 100755
index 000000000..621e6b13f
--- /dev/null
+++ b/qa/standalone/osd/ec-error-rollforward.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ export margin=10
+ export objects=200
+ export poolname=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_ec_error_rollforward() {
+ local dir=$1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd
+ ceph osd pool create ec 1 1 erasure ec-profile
+
+ rados -p ec put foo /etc/passwd
+
+ kill -STOP $(cat $dir/osd.2.pid)
+
+ rados -p ec rm foo &
+ pids="$!"
+ sleep 1
+ rados -p ec rm a &
+ pids+=" $!"
+ rados -p ec rm b &
+ pids+=" $!"
+ rados -p ec rm c &
+ pids+=" $!"
+ sleep 1
+ # Use SIGKILL so stopped osd.2 will terminate
+ # and kill_daemons waits for daemons to die
+ kill_daemons $dir KILL osd
+ kill $pids
+ wait
+
+ activate_osd $dir 0 || return 1
+ activate_osd $dir 1 || return 1
+ activate_osd $dir 2 || return 1
+ activate_osd $dir 3 || return 1
+
+ wait_for_clean || return 1
+}
+
+main ec-error-rollforward "$@"
diff --git a/qa/standalone/osd/osd-bench.sh b/qa/standalone/osd/osd-bench.sh
new file mode 100755
index 000000000..eb1a6a440
--- /dev/null
+++ b/qa/standalone/osd/osd-bench.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7106" # git grep '\<7106\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug-bluestore 20 "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_bench() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ local osd_bench_small_size_max_iops=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_small_size_max_iops)
+ local osd_bench_large_size_max_throughput=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_large_size_max_throughput)
+ local osd_bench_max_block_size=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_max_block_size)
+ local osd_bench_duration=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_duration)
+
+ #
+ # block size too high
+ #
+ expect_failure $dir osd_bench_max_block_size \
+ ceph tell osd.0 bench 1024 $((osd_bench_max_block_size + 1)) || return 1
+
+ #
+ # count too high for small (< 1MB) block sizes
+ #
+ local bsize=1024
+ local max_count=$(($bsize * $osd_bench_duration * $osd_bench_small_size_max_iops))
+ expect_failure $dir bench_small_size_max_iops \
+ ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1
+
+ #
+ # count too high for large (>= 1MB) block sizes
+ #
+ local bsize=$((1024 * 1024 + 1))
+ local max_count=$(($osd_bench_large_size_max_throughput * $osd_bench_duration))
+ expect_failure $dir osd_bench_large_size_max_throughput \
+ ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1
+
+ #
+ # default values should work
+ #
+ ceph tell osd.0 bench || return 1
+
+ #
+ # test object_size < block_size
+ ceph tell osd.0 bench 10 14456 4444 3
+ #
+
+ #
+ # test object_size < block_size & object_size = 0(default value)
+ #
+ ceph tell osd.0 bench 1 14456
+}
+
+main osd-bench "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh
new file mode 100755
index 000000000..aedfbc9b5
--- /dev/null
+++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh
@@ -0,0 +1,497 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+[ `uname` = FreeBSD ] && exit 0
+
+function run() {
+ local dir=$1
+ shift
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_bluestore() {
+ local dir=$1
+
+ local flimit=$(ulimit -n)
+ if [ $flimit -lt 1536 ]; then
+ echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+ fi
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--bluestore_block_size=2147483648 "
+ CEPH_ARGS+="--bluestore_block_db_create=true "
+ CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+ CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
+ CEPH_ARGS+="--bluestore_block_wal_create=true "
+ CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ run_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ run_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ run_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ sleep 5
+
+ create_pool foo 16
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ echo "after bench"
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+ while kill $osd_pid1; do sleep 1 ; done
+ ceph osd down 1
+ while kill $osd_pid2; do sleep 1 ; done
+ ceph osd down 2
+ while kill $osd_pid3; do sleep 1 ; done
+ ceph osd down 3
+
+ # expand slow devices
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ truncate $dir/0/block -s 4294967296 # 4GB
+ ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
+ truncate $dir/1/block -s 4311744512 # 4GB + 16MB
+ ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
+ truncate $dir/2/block -s 4295099392 # 4GB + 129KB
+ ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
+ truncate $dir/3/block -s 4293918720 # 4GB - 1MB
+ ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
+
+ # slow, DB, WAL -> slow, DB
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes
+
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.wal \
+ --dev-target $dir/0/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, DB, WAL -> slow, WAL
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block.db \
+ --dev-target $dir/1/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ # slow, DB, WAL -> slow
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block.wal \
+ --devs-source $dir/2/block.db \
+ --dev-target $dir/2/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow, DB, WAL -> slow, WAL (negative case)
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block.db \
+ --dev-target $dir/3/block.wal \
+ --command bluefs-bdev-migrate
+
+ # Migration to WAL is unsupported
+ if [ $? -eq 0 ]; then
+ return 1
+ fi
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ # slow, DB, WAL -> slow, DB (WAL to slow then slow to DB)
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block.wal \
+ --dev-target $dir/3/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block \
+ --dev-target $dir/3/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ activate_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ activate_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ activate_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ wait_for_clean || return 1
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+ while kill $osd_pid1; do sleep 1 ; done
+ ceph osd down 1
+ while kill $osd_pid2; do sleep 1 ; done
+ ceph osd down 2
+ while kill $osd_pid3; do sleep 1 ; done
+ ceph osd down 3
+
+ # slow, DB -> slow, DB, WAL
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ dd if=/dev/zero of=$dir/0/wal count=512 bs=1M
+ ceph-bluestore-tool --path $dir/0 \
+ --dev-target $dir/0/wal \
+ --command bluefs-bdev-new-wal || return 1
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, WAL -> slow, DB, WAL
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ dd if=/dev/zero of=$dir/1/db count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/1 \
+ --dev-target $dir/1/db \
+ --command bluefs-bdev-new-db || return 1
+
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block \
+ --dev-target $dir/1/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ # slow -> slow, DB, WAL
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/2 \
+ --command bluefs-bdev-new-db || return 1
+
+ ceph-bluestore-tool --path $dir/2 \
+ --command bluefs-bdev-new-wal || return 1
+
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block \
+ --dev-target $dir/2/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow, DB -> slow, WAL
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/3 \
+ --command bluefs-bdev-new-wal || return 1
+
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block.db \
+ --dev-target $dir/3/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ activate_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ activate_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ activate_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+ while kill $osd_pid1; do sleep 1 ; done
+ ceph osd down 1
+ while kill $osd_pid2; do sleep 1 ; done
+ ceph osd down 2
+ while kill $osd_pid3; do sleep 1 ; done
+ ceph osd down 3
+
+ # slow, DB1, WAL -> slow, DB2, WAL
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.db \
+ --dev-target $dir/0/db2 \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, DB, WAL1 -> slow, DB, WAL2
+
+ dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.wal \
+ --dev-target $dir/0/wal2 \
+ --command bluefs-bdev-migrate || return 1
+ rm -rf $dir/0/wal
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, DB + WAL -> slow, DB2 -> slow
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block.db \
+ --devs-source $dir/1/block.wal \
+ --dev-target $dir/1/db2 \
+ --command bluefs-bdev-migrate || return 1
+
+ rm -rf $dir/1/db
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block.db \
+ --dev-target $dir/1/block \
+ --command bluefs-bdev-migrate || return 1
+
+ rm -rf $dir/1/db2
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ # slow -> slow, DB (negative case)
+ ceph-objectstore-tool --type bluestore --data-path $dir/2 \
+ --op fsck --no-mon-config || return 1
+
+ dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block \
+ --dev-target $dir/2/db2 \
+ --command bluefs-bdev-migrate
+
+ # Migration from slow-only to new device is unsupported
+ if [ $? -eq 0 ]; then
+ return 1
+ fi
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow + DB + WAL -> slow, DB2
+ dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
+
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block \
+ --devs-source $dir/2/block.db \
+ --devs-source $dir/2/block.wal \
+ --dev-target $dir/2/db2 \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow + WAL -> slow2, WAL2
+ dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M
+
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block \
+ --devs-source $dir/3/block.wal \
+ --dev-target $dir/3/wal2 \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ activate_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ activate_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ activate_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ wait_for_clean || return 1
+}
+
+function TEST_bluestore2() {
+ local dir=$1
+
+ local flimit=$(ulimit -n)
+ if [ $flimit -lt 1536 ]; then
+ echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+ fi
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--bluestore_block_size=4294967296 "
+ CEPH_ARGS+="--bluestore_block_db_create=true "
+ CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+ CEPH_ARGS+="--bluestore_block_wal_create=false "
+ CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+ CEPH_ARGS+="--osd_pool_default_size=1 "
+ CEPH_ARGS+="--osd_pool_default_min_size=1 "
+ CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ sleep 5
+ create_pool foo 16
+
+ retry = 0
+ while [[ $retry -le 5 ]]; do
+ # write some objects
+ timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
+
+ #give RocksDB some time to cooldown and put files to slow level(s)
+ sleep 10
+
+ db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" )
+ spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
+ ((retry+=1))
+ test $spilled_over -eq 0 || break
+ done
+ test $spilled_over -gt 0 || return 1
+
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.db \
+ --dev-target $dir/0/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/0 \
+ --command bluefs-bdev-sizes || return 1
+
+ ceph-bluestore-tool --path $dir/0 \
+ --command fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ wait_for_clean || return 1
+}
+
+function TEST_bluestore_expand() {
+ local dir=$1
+
+ local flimit=$(ulimit -n)
+ if [ $flimit -lt 1536 ]; then
+ echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+ fi
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--bluestore_block_size=4294967296 "
+ CEPH_ARGS+="--bluestore_block_db_create=true "
+ CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+ CEPH_ARGS+="--bluestore_block_wal_create=false "
+ CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+ CEPH_ARGS+="--osd_pool_default_size=1 "
+ CEPH_ARGS+="--osd_pool_default_min_size=1 "
+ CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ sleep 5
+ create_pool foo 16
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+ sleep 5
+
+ total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
+ free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+
+ # destage allocation to file before expand (in case fast-shutdown skipped that step)
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1
+
+ # expand slow devices
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
+
+ requested_space=4294967296 # 4GB
+ truncate $dir/0/block -s $requested_space
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1
+
+ # slow, DB, WAL -> slow, DB
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
+
+ # compare allocation-file with RocksDB state
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
+
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ wait_for_clean || return 1
+
+ total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
+ free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
+
+ if [$total_space_after != $requested_space]; then
+ echo "total_space_after = $total_space_after"
+ echo "requested_space = $requested_space"
+ return 1;
+ fi
+
+ total_space_added=$((total_space_after - total_space_before))
+ free_space_added=$((free_space_after - free_space_before))
+
+ let new_used_space=($total_space_added - $free_space_added)
+ echo $new_used_space
+ # allow upto 128KB to be consumed
+ if [ $new_used_space -gt 131072 ]; then
+ echo "total_space_added = $total_space_added"
+ echo "free_space_added = $free_space_added"
+ return 1;
+ fi
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
+}
+
+main osd-bluefs-volume-ops "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh"
+# End:
diff --git a/qa/standalone/osd/osd-config.sh b/qa/standalone/osd/osd-config.sh
new file mode 100755
index 000000000..126c2f7de
--- /dev/null
+++ b/qa/standalone/osd/osd-config.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_config_init() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local stale=1000
+ local cache=500
+ run_osd $dir 0 \
+ --osd-map-cache-size=$cache \
+ --osd-pg-epoch-persisted-max-stale=$stale \
+ || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+}
+
+function TEST_config_track() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ local osd_map_cache_size=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_map_cache_size)
+ local osd_pg_epoch_persisted_max_stale=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_pg_epoch_persisted_max_stale)
+
+ #
+ # increase the osd_pg_epoch_persisted_max_stale above the default cache_size
+ #
+ ! grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+ local stale=$(($osd_map_cache_size * 2))
+ ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+ rm $dir/osd.0.log
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log reopen || return 1
+}
+
+function TEST_default_adjustment() {
+ a=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin)
+ b=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin default)
+ c=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin arg)
+ [ "$a" != "default" ] || return 1
+ [ "$b" = "default" ] || return 1
+ [ "$c" = "arg" ] || return 1
+
+ a=$(ceph-osd --no-mon-config --show-config-value log_to_file)
+ b=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false)
+ c=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false --log-to-file)
+ [ "$a" = "true" ] || return 1
+ [ "$b" = "false" ] || return 1
+ [ "$c" = "true" ] || return 1
+}
+
+main osd-config "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-config.sh"
+# End:
diff --git a/qa/standalone/osd/osd-copy-from.sh b/qa/standalone/osd/osd-copy-from.sh
new file mode 100755
index 000000000..8ac0ab541
--- /dev/null
+++ b/qa/standalone/osd/osd-copy-from.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Sage Weil <sage@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7111" # git grep '\<7111\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_copy_from() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+
+ # success
+ rados -p rbd put foo $(which rados)
+ rados -p rbd cp foo foo2
+ rados -p rbd stat foo2
+
+ # failure
+ ceph tell osd.\* injectargs -- --osd-debug-inject-copyfrom-error
+ ! rados -p rbd cp foo foo3
+ ! rados -p rbd stat foo3
+
+ # success again
+ ceph tell osd.\* injectargs -- --no-osd-debug-inject-copyfrom-error
+ ! rados -p rbd cp foo foo3
+ rados -p rbd stat foo3
+}
+
+main osd-copy-from "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/osd/osd-dup.sh b/qa/standalone/osd/osd-dup.sh
new file mode 100755
index 000000000..ab442c538
--- /dev/null
+++ b/qa/standalone/osd/osd-dup.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+[ `uname` = FreeBSD ] && exit 0
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # avoid running out of fds in rados bench
+ CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 "
+ CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 "
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+main osd-dup "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh"
+# End:
diff --git a/qa/standalone/osd/osd-fast-mark-down.sh b/qa/standalone/osd/osd-fast-mark-down.sh
new file mode 100755
index 000000000..0ef9d8ce4
--- /dev/null
+++ b/qa/standalone/osd/osd-fast-mark-down.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Piotr Dałek <git@predictor.org.pl>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+MAX_PROPAGATION_TIME=30
+
+function run() {
+ local dir=$1
+ shift
+ rm -f $dir/*.pid
+ export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+ OLD_ARGS=$CEPH_ARGS
+ CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false "
+ echo "Ensuring old behavior is there..."
+ test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1)
+
+ CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true "
+ OLD_ARGS=$CEPH_ARGS
+
+ CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON"
+ echo "Testing async msgr..."
+ test_fast_kill $dir || return 1
+
+ return 0
+
+}
+
+function test_fast_kill() {
+ # create cluster with 3 osds
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for oi in {0..2}; do
+ run_osd $dir $oi || return 1
+ pids[$oi]=$(cat $dir/osd.$oi.pid)
+ done
+
+ create_rbd_pool || return 1
+
+ # make some objects so osds to ensure connectivity between osds
+ timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1
+ sleep 1
+
+ killid=0
+ previd=0
+
+ # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
+ for i in {1..2}; do
+ while [ $killid -eq $previd ]; do
+ killid=${pids[$RANDOM%${#pids[@]}]}
+ done
+ previd=$killid
+
+ kill -9 $killid
+ time_left=$MAX_PROPAGATION_TIME
+ down_osds=0
+
+ while [ $time_left -gt 0 ]; do
+ sleep 1
+ time_left=$[$time_left - 1];
+
+ grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null
+ if [ $? -ne 0 ]; then
+ continue
+ fi
+
+ down_osds=$(ceph osd tree | grep -c down)
+ if [ $down_osds -lt $i ]; then
+ # osds not marked down yet, try again in a second
+ continue
+ elif [ $down_osds -gt $i ]; then
+ echo Too many \($down_osds\) osds died!
+ return 1
+ else
+ break
+ fi
+ done
+
+ if [ $down_osds -lt $i ]; then
+ echo Killed the OSD, yet it is not marked down
+ ceph osd tree
+ return 1
+ fi
+ done
+ pkill -SIGTERM rados
+ teardown $dir || return 1
+}
+
+main osd-fast-mark-down "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh"
+# End:
diff --git a/qa/standalone/osd/osd-force-create-pg.sh b/qa/standalone/osd/osd-force-create-pg.sh
new file mode 100755
index 000000000..ca4b0239e
--- /dev/null
+++ b/qa/standalone/osd/osd-force-create-pg.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7145" # git grep '\<7145\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_reuse_id() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create foo 50 || return 1
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.0
+ kill_daemons $dir TERM osd.1
+ kill_daemons $dir TERM osd.2
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force
+ ceph-objectstore-tool --data-path $dir/1 --op remove --pgid 1.0 --force
+ ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.0 --force
+ activate_osd $dir 0 || return 1
+ activate_osd $dir 1 || return 1
+ activate_osd $dir 2 || return 1
+ sleep 10
+ ceph pg ls | grep 1.0 | grep stale || return 1
+
+ ceph osd force-create-pg 1.0 --yes-i-really-mean-it || return 1
+ wait_for_clean || return 1
+}
+
+main osd-force-create-pg "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-force-create-pg.sh"
+# End:
diff --git a/qa/standalone/osd/osd-markdown.sh b/qa/standalone/osd/osd-markdown.sh
new file mode 100755
index 000000000..5c4a78440
--- /dev/null
+++ b/qa/standalone/osd/osd-markdown.sh
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Intel <contact@intel.com.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Xiaoxi Chen <xiaoxi.chen@intel.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7108" # git grep '\<7108\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function markdown_N_impl() {
+ markdown_times=$1
+ total_time=$2
+ sleeptime=$3
+ for i in `seq 1 $markdown_times`
+ do
+ # check the OSD is UP
+ ceph tell osd.0 get_latest_osdmap || return 1
+ ceph osd tree
+ ceph osd tree | grep osd.0 |grep up || return 1
+ # mark the OSD down.
+ # override any dup setting in the environment to ensure we do this
+ # exactly once (modulo messenger failures, at least; we can't *actually*
+ # provide exactly-once semantics for mon commands).
+ ( unset CEPH_CLI_TEST_DUP_COMMAND ; ceph osd down 0 )
+ sleep $sleeptime
+ done
+}
+
+
+function TEST_markdown_exceed_maxdown_count() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_rbd_pool || return 1
+
+ # 3+1 times within 300s, osd should stay dead on the 4th time
+ local count=3
+ local sleeptime=10
+ local period=300
+ ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+ ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+ markdown_N_impl $(($count+1)) $period $sleeptime
+ # down N+1 times ,the osd.0 should die
+ ceph osd tree | grep down | grep osd.0 || return 1
+}
+
+function TEST_markdown_boot() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_rbd_pool || return 1
+
+ # 3 times within 120s, should stay up
+ local count=3
+ local sleeptime=10
+ local period=120
+ ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+ ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+ markdown_N_impl $count $period $sleeptime
+ #down N times, osd.0 should be up
+ sleep 15 # give osd plenty of time to notice and come back up
+ ceph tell osd.0 get_latest_osdmap || return 1
+ ceph osd tree | grep up | grep osd.0 || return 1
+}
+
+function TEST_markdown_boot_exceed_time() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_rbd_pool || return 1
+
+ # 3+1 times, but over 40s, > 20s, so should stay up
+ local count=3
+ local period=20
+ local sleeptime=10
+ ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+ ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+ markdown_N_impl $(($count+1)) $period $sleeptime
+ sleep 15 # give osd plenty of time to notice and come back up
+ ceph tell osd.0 get_latest_osdmap || return 1
+ ceph osd tree | grep up | grep osd.0 || return 1
+}
+
+function TEST_osd_stop() {
+
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ osd_0_pid=$(cat $dir/osd.0.pid)
+ ps -p $osd_0_pid || return 1
+
+ ceph osd tree | grep osd.0 | grep up || return 1
+ ceph osd stop osd.0
+ sleep 15 # give osd plenty of time to notice and exit
+ ceph osd tree | grep down | grep osd.0 || return 1
+ ! ps -p $osd_0_pid || return 1
+}
+
+main osd-markdown "$@"
diff --git a/qa/standalone/osd/osd-reactivate.sh b/qa/standalone/osd/osd-reactivate.sh
new file mode 100755
index 000000000..6d6438629
--- /dev/null
+++ b/qa/standalone/osd/osd-reactivate.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+#
+# Author: Vicente Cheng <freeze.bilsted@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_reactivate() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ kill_daemons $dir TERM osd || return 1
+
+ ready_path=$dir"/0/ready"
+ activate_path=$dir"/0/active"
+ # trigger mkfs again
+ rm -rf $ready_path $activate_path
+ activate_osd $dir 0 || return 1
+
+}
+
+main osd-reactivate "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reactivate.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-prio.sh b/qa/standalone/osd/osd-recovery-prio.sh
new file mode 100755
index 000000000..02b65f67a
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-prio.sh
@@ -0,0 +1,542 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+ # Set osd op queue = wpq for the tests. Recovery priority is not
+ # considered by mclock_scheduler leading to unexpected results.
+ CEPH_ARGS+="--osd-op-queue=wpq "
+ export objects=200
+ export poolprefix=test
+ export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
+ export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function TEST_recovery_priority() {
+ local dir=$1
+ local pools=10
+ local OSDS=5
+ local max_tries=10
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 3 pools with a pg with the same primaries but second
+ # replica on another osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1_1
+ local chk_osd1_2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ local chk_osd2
+
+ local PG3
+ local POOLNUM3
+ local pool3
+
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ -z "$PG1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1_1=$test_osd1
+ chk_osd1_2=$test_osd2
+ elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ chk_osd2=$test_osd2
+ elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
+ then
+ PG3="${p}.0"
+ POOLNUM3=$p
+ pool3="${poolprefix}$p"
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" -o "pool3" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/data bs=1M count=10
+ p=1
+ for pname in $pool1 $pool2 $pool3
+ do
+ for i in $(seq 1 $objects)
+ do
+ rados -p ${pname} put obj${i}-p${p} $dir/data
+ done
+ p=$(expr $p + 1)
+ done
+
+ local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+ ceph pg dump pgs
+ ERRORS=0
+
+ ceph osd set norecover
+ ceph osd set noout
+
+ # Get a pg to want to recover and quickly force it
+ # to be preempted.
+ ceph osd pool set $pool3 size 2
+ sleep 2
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ # 3. Item is in progress, adjust priority with no higher priority waiting
+ for i in $(seq 1 $max_tries)
+ do
+ if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then
+ break
+ fi
+ if [ "$i" = "$max_tries" ]; then
+ echo "ERROR: Didn't appear to be able to force-recovery"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ sleep 2
+ done
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ ceph osd out osd.$chk_osd1_2
+ sleep 2
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+ ceph pg dump pgs
+
+ ceph osd pool set $pool2 size 2
+ sleep 2
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ ceph pg dump pgs
+
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG3} ];
+ then
+ echo "The first force-recovery PG $PG3 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # 1. Item is queued, re-queue with new priority
+ for i in $(seq 1 $max_tries)
+ do
+ if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then
+ break
+ fi
+ if [ "$i" = "$max_tries" ]; then
+ echo "ERROR: Didn't appear to be able to force-recovery"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ sleep 2
+ done
+ sleep 2
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+ if [ "$PRIO" != "$FORCE_PRIO" ];
+ then
+ echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ flush_pg_stats || return 1
+
+ # 4. Item is in progress, if higher priority items waiting prempt item
+ #ceph osd unset norecover
+ ceph pg cancel-force-recovery $PG3 || return 1
+ sleep 2
+ #ceph osd set norecover
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The force-recovery PG $PG2 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ ceph pg cancel-force-recovery $PG2 || return 1
+ sleep 5
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
+ flush_pg_stats || return 1
+ ceph pg force-recovery $PG3 || return 1
+ sleep 2
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG3} ];
+ then
+ echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ ceph osd unset noout
+ ceph osd unset norecover
+
+ wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
+
+ ceph pg dump pgs
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "$ERRORS error(s) found"
+ else
+ echo TEST PASSED
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ delete_pool $pool3
+ kill_daemons $dir || return 1
+ return $ERRORS
+}
+
+#
+# Show that pool recovery_priority is added to recovery priority
+#
+# Create 2 pools with 2 OSDs with different primarys
+# pool 1 with recovery_priority 1
+# pool 2 with recovery_priority 2
+#
+# Start recovery by changing the pool sizes from 1 to 2
+# Use dump_recovery_reservations to verify priorities
+function TEST_recovery_pool_priority() {
+ local dir=$1
+ local pools=3 # Don't assume the first 2 pools are exact what we want
+ local OSDS=2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 2 pools with different primaries which
+ # means the replica must be on another osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1_1
+ local chk_osd1_2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ local chk_osd2_1
+ local chk_osd2_2
+
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ -z "$PG1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1_1=$test_osd1
+ chk_osd1_2=$test_osd2
+ elif [ $chk_osd1_1 != $test_osd1 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ chk_osd2_1=$test_osd1
+ chk_osd2_2=$test_osd2
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ pool1_extra_prio=1
+ pool2_extra_prio=2
+ pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio)
+ pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio)
+
+ ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/data bs=1M count=10
+ p=1
+ for pname in $pool1 $pool2
+ do
+ for i in $(seq 1 $objects)
+ do
+ rados -p ${pname} put obj${i}-p${p} $dir/data
+ done
+ p=$(expr $p + 1)
+ done
+
+ local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+ ceph pg dump pgs
+ ERRORS=0
+
+ ceph osd pool set $pool1 size 2
+ ceph osd pool set $pool2 size 2
+
+ # Wait for both PGs to be in recovering state
+ ceph pg dump pgs
+
+ # Wait for recovery to start
+ set -o pipefail
+ count=0
+ while(true)
+ do
+ if test $(ceph --format json pg dump pgs |
+ jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2"
+ then
+ break
+ fi
+ sleep 2
+ if test "$count" -eq "10"
+ then
+ echo "Recovery never started on both PGs"
+ return 1
+ fi
+ count=$(expr $count + 1)
+ done
+ set +o pipefail
+ ceph pg dump pgs
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
+ echo osd.${chk_osd1_1}
+ cat $dir/dump.${chk_osd1_1}.out
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
+ echo osd.${chk_osd1_2}
+ cat $dir/dump.${chk_osd1_2}.out
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG1} ];
+ then
+ echo "The primary PG for $pool1 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool1_prio ];
+ then
+ echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG1} ];
+ then
+ echo "The primary PG for $pool1 didn't become the in progress item on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool1_prio ];
+ then
+ echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The primary PG for $pool2 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool2_prio ];
+ then
+ echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The primary PG $PG2 didn't become the in progress item on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool2_prio ];
+ then
+ echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ wait_for_clean || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "$ERRORS error(s) found"
+ else
+ echo TEST PASSED
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ kill_daemons $dir || return 1
+ return $ERRORS
+}
+
+main osd-recovery-prio "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-space.sh b/qa/standalone/osd/osd-recovery-space.sh
new file mode 100755
index 000000000..3bafc5138
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-space.sh
@@ -0,0 +1,176 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_max_backfills=10 "
+ CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+ export objects=600
+ export poolprefix=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function get_num_in_state() {
+ local state=$1
+ local expression
+ expression+="select(contains(\"${state}\"))"
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+
+function wait_for_state() {
+ local state=$1
+ local cur_in_state
+ local -a delays=($(get_timeout_delays $2 5))
+ local -i loop=0
+
+ flush_pg_stats || return 1
+ while test $(get_num_pgs) == 0 ; do
+ sleep 1
+ done
+
+ while true ; do
+ cur_in_state=$(get_num_in_state ${state})
+ test $cur_in_state -gt 0 && break
+ if (( $loop >= ${#delays[*]} )) ; then
+ ceph pg dump pgs
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+ return 0
+}
+
+
+function wait_for_recovery_toofull() {
+ local timeout=$1
+ wait_for_state recovery_toofull $timeout
+}
+
+
+# Create 1 pools with size 1
+# set ful-ratio to 50%
+# Write data 600 5K (3000K)
+# Inject fake_statfs_for_testing to 3600K (83% full)
+# Incresase the pool size to 2
+# The pool shouldn't have room to recovery
+function TEST_recovery_test_simple() {
+ local dir=$1
+ local pools=1
+ local OSDS=2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-nearfull-ratio .40
+ ceph osd set-backfillfull-ratio .45
+ ceph osd set-full-ratio .50
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+ done
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
+ for o in $(seq 1 $objects)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+
+ for o in $(seq 0 $(expr $OSDS - 1))
+ do
+ ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
+ done
+ sleep 5
+
+ ceph pg dump pgs
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+
+ # If this times out, we'll detected errors below
+ wait_for_recovery_toofull 30
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in recovery_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+ ceph status
+ ceph status --format=json-pretty > $dir/stat.json
+
+ eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
+ if [ "$SEV" != "HEALTH_ERR" ]; then
+ echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
+ if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
+ echo "PG_RECOVERY_FULL message '$MSG' mismatched"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ rm -f $dir/stat.json
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+
+main osd-recovery-space "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-stats.sh b/qa/standalone/osd/osd-recovery-stats.sh
new file mode 100755
index 000000000..ad6f810d7
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-stats.sh
@@ -0,0 +1,512 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # so we will not force auth_log_shard to be acting_primary
+ CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+ export margin=10
+ export objects=200
+ export poolname=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function below_margin() {
+ local -i check=$1
+ shift
+ local -i target=$1
+
+ return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
+}
+
+function above_margin() {
+ local -i check=$1
+ shift
+ local -i target=$1
+
+ return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
+}
+
+FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
+FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
+FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
+
+function check() {
+ local dir=$1
+ local PG=$2
+ local primary=$3
+ local type=$4
+ local degraded_start=$5
+ local degraded_end=$6
+ local misplaced_start=$7
+ local misplaced_end=$8
+ local primary_start=${9:-}
+ local primary_end=${10:-}
+
+ local log=$dir/osd.${primary}.log
+
+ local addp=" "
+ if [ "$type" = "erasure" ];
+ then
+ addp="p"
+ fi
+
+ UPACT=$(eval $FIND_UPACT)
+
+ # Check 3rd line at start because of false recovery starts
+ local which="degraded"
+ FIRST=$(eval $FIND_FIRST)
+ below_margin $FIRST $degraded_start || return 1
+ LAST=$(eval $FIND_LAST)
+ above_margin $LAST $degraded_end || return 1
+
+ # Check 3rd line at start because of false recovery starts
+ which="misplaced"
+ FIRST=$(eval $FIND_FIRST)
+ below_margin $FIRST $misplaced_start || return 1
+ LAST=$(eval $FIND_LAST)
+ above_margin $LAST $misplaced_end || return 1
+
+ # This is the value of set into MISSING_ON_PRIMARY
+ if [ -n "$primary_start" ];
+ then
+ which="shard $primary"
+ FIRST=$(eval $FIND_FIRST)
+ below_margin $FIRST $primary_start || return 1
+ LAST=$(eval $FIND_LAST)
+ above_margin $LAST $primary_end || return 1
+ fi
+}
+
+# [1,0,?] -> [1,2,4]
+# degraded 500 -> 0
+# active+recovering+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467
+function do_recovery_out1() {
+ local dir=$1
+ shift
+ local type=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ if [ $type = "erasure" ];
+ then
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 $type myprofile
+ else
+ create_pool $poolname 1 1 $type
+ fi
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ # Only 2 OSDs so only 1 not primary
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set norecover
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ ceph osd out osd.${otherosd}
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ check $dir $PG $primary $type $objects 0 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+function TEST_recovery_replicated_out1() {
+ local dir=$1
+
+ do_recovery_out1 $dir replicated || return 1
+}
+
+function TEST_recovery_erasure_out1() {
+ local dir=$1
+
+ do_recovery_out1 $dir erasure || return 1
+}
+
+# [0, 1] -> [2,3,4,5]
+# degraded 1000 -> 0
+# misplaced 1000 -> 0
+# missing on primary 500 -> 0
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748
+function TEST_recovery_sizeup() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ # Only 2 OSDs so only 1 not primary
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set norecover
+ ceph osd out osd.$primary osd.$otherosd
+ ceph osd pool set test size 4
+ ceph osd unset norecover
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+
+ ceph tell osd.${primary} debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local degraded=$(expr $objects \* 2)
+ local misplaced=$(expr $objects \* 2)
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [0, 1, 2, 4] -> [3, 5]
+# misplaced 1000 -> 0
+# missing on primary 500 -> 0
+# active+recovering+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248
+function TEST_recovery_sizedown() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 4
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ # Only 2 OSDs so only 1 not primary
+ local allosds=$(get_osds $poolname obj1)
+
+ ceph osd set norecover
+ for osd in $allosds
+ do
+ ceph osd out osd.$osd
+ done
+
+ ceph osd pool set test size 2
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+
+ local misplaced=$(expr $objects \* 2)
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1
+
+ UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/")
+
+ # This is the value of set into MISSING_ON_PRIMARY
+ FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/")
+ below_margin $FIRST $objects || return 1
+ LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/")
+ above_margin $LAST 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [1] -> [1,2]
+# degraded 300 -> 200
+# active+recovering+undersized+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563
+function TEST_recovery_undersized() {
+ local dir=$1
+
+ local osds=3
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $(seq 0 $(expr $osds - 1))
+ do
+ run_osd $dir $i || return 1
+ done
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+
+ ceph osd set norecover
+ # Mark any osd not the primary (only 1 replica so also has no replica)
+ for i in $(seq 0 $(expr $osds - 1))
+ do
+ if [ $i = $primary ];
+ then
+ continue
+ fi
+ ceph osd out osd.$i
+ break
+ done
+ ceph osd pool set test size 4
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean()
+ sleep 10
+ flush_pg_stats || return 1
+
+ # Wait for recovery to finish
+ # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded
+ # to active+undersized+degraded
+ for i in $(seq 1 300)
+ do
+ if ceph pg dump pgs | grep ^$PG | grep -qv recovering
+ then
+ break
+ fi
+ if [ $i = "300" ];
+ then
+ echo "Timeout waiting for recovery to finish"
+ return 1
+ fi
+ sleep 1
+ done
+
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+ local log=$dir/osd.${primary}.log
+
+ local first_degraded=$(expr $objects \* 3)
+ local last_degraded=$(expr $objects \* 2)
+ check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [1,0,2] -> [1,3,NONE]/[1,3,2]
+# degraded 100 -> 0
+# misplaced 100 -> 100
+# active+recovering+degraded+remapped
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242
+function TEST_recovery_erasure_remapped() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 erasure myprofile
+ ceph osd pool set $poolname min_size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set norecover
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ ceph osd out osd.${otherosd}
+
+ # Mark osd not the primary and not down/out osd as just out
+ for i in 0 1 2 3
+ do
+ if [ $i = $primary ];
+ then
+ continue
+ fi
+ if [ $i = $otherosd ];
+ then
+ continue
+ fi
+ ceph osd out osd.$i
+ break
+ done
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary erasure $objects 0 $objects $objects || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+function TEST_recovery_multi() {
+ local dir=$1
+
+ local osds=6
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $(seq 0 $(expr $osds - 1))
+ do
+ run_osd $dir $i || return 1
+ done
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 1
+
+ wait_for_clean || return 1
+
+ rados -p $poolname put obj1 /dev/null
+
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set noout
+ ceph osd set norecover
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+
+ local half=$(expr $objects / 2)
+ for i in $(seq 2 $half)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ kill $(cat $dir/osd.${primary}.pid)
+ ceph osd down osd.${primary}
+ activate_osd $dir ${otherosd}
+ sleep 3
+
+ for i in $(seq $(expr $half + 1) $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj$objects)
+
+ ceph osd unset noout
+ ceph osd out osd.$primary osd.$otherosd
+ activate_osd $dir ${primary}
+ sleep 3
+
+ ceph osd pool set test size 4
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+main osd-recovery-stats "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"
+# End:
diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh
new file mode 100755
index 000000000..6fea441b3
--- /dev/null
+++ b/qa/standalone/osd/osd-rep-recov-eio.sh
@@ -0,0 +1,422 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+#
+# Author: Kefu Chai <kchai@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+warnings=10
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ # set warning amount in case default changes
+ run_mon $dir a --mon_osd_warn_num_repaired=$warnings || return 1
+ run_mgr $dir x || return 1
+ ceph osd pool create foo 8 || return 1
+
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function setup_osds() {
+ local count=$1
+ shift
+ local type=$1
+
+ for id in $(seq 0 $(expr $count - 1)) ; do
+ run_osd${type} $dir $id || return 1
+ done
+ wait_for_clean || return 1
+}
+
+function get_state() {
+ local pgid=$1
+ local sname=state
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function rados_put() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+
+ for marker in AAA BBB CCCC DDDD ; do
+ printf "%*s" 1024 $marker
+ done > $dir/ORIGINAL
+ #
+ # get and put an object, compare they are equal
+ #
+ rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+}
+
+function rados_get() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+ local expect=${4:-ok}
+
+ #
+ # Expect a failure to get object
+ #
+ if [ $expect = "fail" ];
+ then
+ ! rados --pool $poolname get $objname $dir/COPY
+ return
+ fi
+ #
+ # Expect hang trying to get object
+ #
+ if [ $expect = "hang" ];
+ then
+ timeout 5 rados --pool $poolname get $objname $dir/COPY
+ test "$?" = "124"
+ return
+ fi
+ #
+ # get an object, compare with $dir/ORIGINAL
+ #
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ rm $dir/COPY
+}
+
+function rados_get_data() {
+ local inject=$1
+ shift
+ local dir=$1
+
+ local poolname=pool-rep
+ local objname=obj-$inject-$$
+ local pgid=$(get_pg $poolname $objname)
+
+ rados_put $dir $poolname $objname || return 1
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "1" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "1" || return 1
+
+ local object_osds=($(get_osds $poolname $objname))
+ local primary=${object_osds[0]}
+ local bad_peer=${object_osds[1]}
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 1 || return 1
+ # Force primary to pull from the bad peer, so we can repair it too!
+ set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ # Wait until automatic repair of bad peer is done
+ wait_for_clean || return 1
+
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 2 || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "3" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "4" || return 1
+
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 1 || return 1
+ inject_$inject rep data $poolname $objname $dir 2 || return 1
+ rados_get $dir $poolname $objname hang || return 1
+
+ wait_for_clean
+ # After hang another repair couldn't happen, so count stays the same
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "3" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "4" || return 1
+}
+
+function TEST_rados_get_with_eio() {
+ local dir=$1
+
+ setup_osds 4 || return 1
+
+ local poolname=pool-rep
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+ rados_get_data eio $dir || return 1
+
+ delete_pool $poolname
+}
+
+function TEST_rados_repair_warning() {
+ local dir=$1
+ local OBJS=$(expr $warnings + 1)
+
+ setup_osds 4 || return 1
+
+ local poolname=pool-rep
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ local poolname=pool-rep
+ local objbase=obj-warn
+ local inject=eio
+
+ for i in $(seq 1 $OBJS)
+ do
+ rados_put $dir $poolname ${objbase}-$i || return 1
+ inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
+ rados_get $dir $poolname ${objbase}-$i || return 1
+ done
+ local pgid=$(get_pg $poolname ${objbase}-1)
+
+ local object_osds=($(get_osds $poolname ${objbase}-1))
+ local primary=${object_osds[0]}
+ local bad_peer=${object_osds[1]}
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$OBJS" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$OBJS" || return 1
+
+ ceph health | grep -q "Too many repaired reads on 1 OSDs" || return 1
+ ceph health detail | grep -q "osd.$primary had $OBJS reads repaired" || return 1
+
+ ceph health mute OSD_TOO_MANY_REPAIRS
+ set -o pipefail
+ # Should mute this
+ ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+ set +o pipefail
+
+ for i in $(seq 1 $OBJS)
+ do
+ inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
+ inject_$inject rep data $poolname ${objbase}-$i $dir 1 || return 1
+ # Force primary to pull from the bad peer, so we can repair it too!
+ set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+ rados_get $dir $poolname ${objbase}-$i || return 1
+ done
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$(expr $OBJS \* 2)" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
+
+ # Give mon a chance to notice additional OSD and unmute
+ # The default tick time is 5 seconds
+ CHECKTIME=10
+ LOOPS=0
+ while(true)
+ do
+ sleep 1
+ if ceph health | grep -q "Too many repaired reads on 2 OSDs"
+ then
+ break
+ fi
+ LOOPS=$(expr $LOOPS + 1)
+ if test "$LOOPS" = "$CHECKTIME"
+ then
+ echo "Too many repaired reads not seen after $CHECKTIME seconds"
+ return 1
+ fi
+ done
+ ceph health detail | grep -q "osd.$primary had $(expr $OBJS \* 2) reads repaired" || return 1
+ ceph health detail | grep -q "osd.$bad_peer had $OBJS reads repaired" || return 1
+
+ delete_pool $poolname
+}
+
+# Test backfill with unfound object
+function TEST_rep_backfill_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=300
+ # Must be between 1 and $lastobj
+ local testobj=obj250
+
+ export CEPH_ARGS
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+ setup_osds 3 || return 1
+
+ local poolname=test-pool
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio rep data $poolname $testobj $dir 0 || return 1
+ inject_eio rep data $poolname $testobj $dir 1 || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 360); do
+ state=$(get_state 2.0)
+ echo $state | grep backfill_unfound
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo "$state "
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_pool $poolname
+}
+
+# Test recovery with unfound object
+function TEST_rep_recovery_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=100
+ # Must be between 1 and $lastobj
+ local testobj=obj75
+
+ setup_osds 3 || return 1
+
+ local poolname=test-pool
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio rep data $poolname $testobj $dir 0 || return 1
+ inject_eio rep data $poolname $testobj $dir 1 || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 100); do
+ state=$(get_state 2.0)
+ echo $state | grep -v recovering
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo "$state "
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_pool $poolname
+}
+
+main osd-rep-recov-eio.sh "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh"
+# End:
diff --git a/qa/standalone/osd/osd-reuse-id.sh b/qa/standalone/osd/osd-reuse-id.sh
new file mode 100755
index 000000000..b24b6f2eb
--- /dev/null
+++ b/qa/standalone/osd/osd-reuse-id.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7123" # git grep '\<7123\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_reuse_id() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ destroy_osd $dir 1 || return 1
+ run_osd $dir 1 || return 1
+}
+
+main osd-reuse-id "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reuse-id.sh"
+# End:
diff --git a/qa/standalone/osd/pg-split-merge.sh b/qa/standalone/osd/pg-split-merge.sh
new file mode 100755
index 000000000..7f2899b60
--- /dev/null
+++ b/qa/standalone/osd/pg-split-merge.sh
@@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10"
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_a_merge_empty() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create foo 2 || return 1
+ ceph osd pool set foo pgp_num 1 || return 1
+
+ wait_for_clean || return 1
+
+ # note: we need 1.0 to have the same or more objects than 1.1
+ # 1.1
+ rados -p foo put foo1 /etc/passwd
+ rados -p foo put foo2 /etc/passwd
+ rados -p foo put foo3 /etc/passwd
+ rados -p foo put foo4 /etc/passwd
+ # 1.0
+ rados -p foo put foo5 /etc/passwd
+ rados -p foo put foo6 /etc/passwd
+ rados -p foo put foo8 /etc/passwd
+ rados -p foo put foo10 /etc/passwd
+ rados -p foo put foo11 /etc/passwd
+ rados -p foo put foo12 /etc/passwd
+ rados -p foo put foo16 /etc/passwd
+
+ wait_for_clean || return 1
+
+ ceph tell osd.1 config set osd_debug_no_purge_strays true
+ ceph osd pool set foo size 2 || return 1
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.2 || return 1
+ ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1
+ activate_osd $dir 2 || return 1
+
+ wait_for_clean || return 1
+
+ # osd.2: now 1.0 is there but 1.1 is not
+
+ # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is
+ # the problematic state... then let it merge with 1.0
+ ceph tell osd.2 config set osd_debug_no_acting_change true
+ ceph osd out 0 1
+ ceph osd pool set foo pg_num 1
+ sleep 5
+ ceph tell osd.2 config set osd_debug_no_acting_change false
+
+ # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get
+ # removed
+ ceph osd in 0 1
+ ceph osd pool set foo size 3
+
+ wait_for_clean || return 1
+
+ # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing
+ # half of its objects).
+ ceph pg scrub 1.0
+ sleep 10
+ ceph log last debug
+ ceph pg ls
+ ceph pg ls | grep ' active.clean ' || return 1
+}
+
+function TEST_import_after_merge_and_gap() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd pool create foo 2 || return 1
+ wait_for_clean || return 1
+ rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
+ activate_osd $dir 0 || return 1
+
+ ceph osd pool set foo pg_num 1
+ sleep 5
+ while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done
+ wait_for_clean || return 1
+
+ #
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+ # this will import both halves the original pg
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+
+ # make a map gap
+ for f in `seq 1 50` ; do
+ ceph osd set nodown
+ ceph osd unset nodown
+ done
+
+ # poke and prod to ensure last_epech_clean is big, reported to mon, and
+ # the osd is able to trim old maps
+ rados -p foo bench 1 write -b 1024 --no-cleanup || return 1
+ wait_for_clean || return 1
+ ceph tell osd.0 send_beacon
+ sleep 5
+ ceph osd set nodown
+ ceph osd unset nodown
+ sleep 5
+
+ kill_daemons $dir TERM osd.0 || return 1
+
+ # this should fail.. 1.1 still doesn't exist
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1
+
+ # this should fail too because of the gap
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ # we can force it...
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1
+
+ # ...but the osd won't start, so remove it again.
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1
+
+
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+}
+
+function TEST_import_after_split() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd pool create foo 1 || return 1
+ wait_for_clean || return 1
+ rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
+ activate_osd $dir 0 || return 1
+
+ ceph osd pool set foo pg_num 2
+ sleep 5
+ while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.0 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+
+ # this should fail because 1.1 (split child) is there
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+ # now it will work (1.1. is gone)
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+}
+
+
+main pg-split-merge "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh"
+# End:
diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh
new file mode 100755
index 000000000..af406ef92
--- /dev/null
+++ b/qa/standalone/osd/repeer-on-acting-back.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn>
+#
+# Author: xie xingguo <xie.xingguo@zte.com.cn>
+# Author: Yan Jun <yan.jun8@zte.com.cn>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export poolname=test
+ export testobjects=100
+ export loglen=12
+ export trim=$(expr $loglen / 2)
+ export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # so we will not force auth_log_shard to be acting_primary
+ CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+ # use small pg_log settings, so we always do backfill instead of recovery
+ CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function TEST_repeer_on_down_acting_member_coming_back() {
+ local dir=$1
+ local dummyfile='/etc/fstab'
+
+ local num_osds=6
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+ local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid')
+ local pgid=$poolid.0
+
+ # enable required feature-bits for upmap
+ ceph osd set-require-min-compat-client luminous
+ # reset up to [1,2,3]
+ ceph osd pg-upmap $pgid 1 2 3 || return 1
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # reset up to [1,4,5]
+ ceph osd pg-upmap $pgid 1 4 5 || return 1
+
+ # wait for peering to complete
+ sleep 2
+
+ # make sure osd.2 belongs to current acting set
+ ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+ # kill osd.2
+ kill_daemons $dir KILL osd.2 || return 1
+ ceph osd down osd.2
+
+ # again, wait for peering to complete
+ sleep 2
+
+ # osd.2 should have been moved out from acting set
+ ceph pg $pgid query | jq '.acting' | grep 2 && return 1
+
+ # bring up osd.2
+ activate_osd $dir 2 || return 1
+ wait_for_osd up 2
+
+ # again, wait for peering to complete
+ sleep 2
+
+ # primary should be able to re-add osd.2 into acting
+ ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+main repeer-on-acting-back "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh"
+# End:
diff --git a/qa/standalone/osd/repro_long_log.sh b/qa/standalone/osd/repro_long_log.sh
new file mode 100755
index 000000000..fa27d7017
--- /dev/null
+++ b/qa/standalone/osd/repro_long_log.sh
@@ -0,0 +1,197 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: Josh Durgin <jdurgin@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+PGID=
+
+function test_log_size()
+{
+ local PGID=$1
+ local EXPECTED=$2
+ local DUPS_EXPECTED=${3:-0}
+ ceph tell osd.\* flush_pg_stats
+ sleep 3
+ ceph pg $PGID query | jq .info.stats.log_size
+ ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}"
+ ceph pg $PGID query | jq .info.stats.log_dups_size
+ ceph pg $PGID query | jq .info.stats.log_dups_size | grep "${DUPS_EXPECTED}"
+}
+
+function setup_log_test() {
+ local dir=$1
+ local which=$2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create test 1 1 || true
+ POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool')
+ PGID="${POOL_ID}.0"
+
+ # With 1 PG setting entries per osd 20 results in a target log of 20
+ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 20 || return 1
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1
+ ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1
+ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1
+
+ touch $dir/foo
+ for i in $(seq 1 20)
+ do
+ rados -p test put foo $dir/foo || return 1
+ done
+
+ test_log_size $PGID 20 || return 1
+
+ rados -p test rm foo || return 1
+
+ # generate error entries
+ for i in $(seq 1 20)
+ do
+ rados -p test rm foo
+ done
+
+ # log should have been trimmed down to min_entries with one extra
+ test_log_size $PGID 21 || return 1
+}
+
+function TEST_repro_long_log1()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+ # regular write should trim the log
+ rados -p test put foo $dir/foo || return 1
+ test_log_size $PGID 22 || return 1
+}
+
+function TEST_repro_long_log2()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+ local PRIMARY=$(ceph pg $PGID query | jq '.info.stats.up_primary')
+ kill_daemons $dir TERM osd.$PRIMARY || return 1
+ CEPH_ARGS="--osd-max-pg-log-entries=2 --osd-pg-log-dups-tracked=3 --no-mon-config" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1
+ activate_osd $dir $PRIMARY || return 1
+ wait_for_clean || return 1
+ test_log_size $PGID 21 18 || return 1
+}
+
+function TEST_trim_max_entries()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+
+ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4
+ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 0
+
+ # adding log entries, should only trim 4 and add one each time
+ rados -p test rm foo
+ test_log_size $PGID 18 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 15 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 12 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 9 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 6 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 || return 1
+
+ # below trim_min
+ rados -p test rm foo
+ test_log_size $PGID 4 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 4 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 || return 1
+}
+
+function TEST_trim_max_entries_with_dups()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+
+ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4
+ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1
+
+ # adding log entries, should only trim 4 and add one each time
+ # dups should be trimmed to 1
+ rados -p test rm foo
+ test_log_size $PGID 18 2 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 15 6 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 12 10 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 9 14 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 6 18 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 20 || return 1
+
+ # below trim_min
+ rados -p test rm foo
+ test_log_size $PGID 4 20 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 20 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 4 20 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 20 || return 1
+}
+
+main repro-long-log "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh"
+# End: