diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /qa/standalone/osd | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'qa/standalone/osd')
-rwxr-xr-x | qa/standalone/osd/bad-inc-map.sh | 62 | ||||
-rwxr-xr-x | qa/standalone/osd/divergent-priors.sh | 855 | ||||
-rwxr-xr-x | qa/standalone/osd/ec-error-rollforward.sh | 66 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-bench.sh | 97 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-bluefs-volume-ops.sh | 497 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-config.sh | 97 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-copy-from.sh | 68 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-dup.sh | 30 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-fast-mark-down.sh | 111 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-force-create-pg.sh | 53 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-markdown.sh | 149 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-reactivate.sh | 56 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-recovery-prio.sh | 542 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-recovery-space.sh | 176 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-recovery-stats.sh | 512 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-rep-recov-eio.sh | 422 | ||||
-rwxr-xr-x | qa/standalone/osd/osd-reuse-id.sh | 53 | ||||
-rwxr-xr-x | qa/standalone/osd/pg-split-merge.sh | 203 | ||||
-rwxr-xr-x | qa/standalone/osd/repeer-on-acting-back.sh | 129 | ||||
-rwxr-xr-x | qa/standalone/osd/repro_long_log.sh | 197 |
20 files changed, 4375 insertions, 0 deletions
diff --git a/qa/standalone/osd/bad-inc-map.sh b/qa/standalone/osd/bad-inc-map.sh new file mode 100755 index 000000000..cc3cf27cc --- /dev/null +++ b/qa/standalone/osd/bad-inc-map.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +mon_port=$(get_unused_port) + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:$mon_port" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + set -e + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bad_inc_map() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + + ceph config set osd.2 osd_inject_bad_map_crc_probability 1 + + # osd map churn + create_pool foo 8 + ceph osd pool set foo min_size 1 + ceph osd pool set foo min_size 2 + + sleep 5 + + # make sure all the OSDs are still up + TIMEOUT=10 wait_for_osd up 0 + TIMEOUT=10 wait_for_osd up 1 + TIMEOUT=10 wait_for_osd up 2 + + # check for the signature in the log + grep "injecting map crc failure" $dir/osd.2.log || return 1 + grep "bailing because last" $dir/osd.2.log || return 1 + + echo success + + delete_pool foo + kill_daemons $dir || return 1 +} + +main bad-inc-map "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh" +# End: diff --git a/qa/standalone/osd/divergent-priors.sh b/qa/standalone/osd/divergent-priors.sh new file mode 100755 index 000000000..40d72544d --- /dev/null +++ b/qa/standalone/osd/divergent-priors.sh @@ -0,0 +1,855 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # This should multiple of 6 + export loglen=12 + export divisor=3 + export trim=$(expr $loglen / 2) + export DIVERGENT_WRITE=$(expr $trim / $divisor) + export DIVERGENT_REMOVE=$(expr $trim / $divisor) + export DIVERGENT_CREATE=$(expr $trim / $divisor) + export poolname=test + export testobjects=100 + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + CEPH_ARGS+="--osd_debug_pg_log_writeout=true " + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +# Special case divergence test +# Test handling of divergent entries with prior_version +# prior to log_tail +# based on qa/tasks/divergent_prior.py +function TEST_divergent() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + local case5=$testobjects + local case3=$(expr $testobjects - 1) + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$case5 $dummyfile & + echo 'create missing divergent object' + inject_eio rep data $poolname existing_$case3 $dir 0 || return 1 + rados -p $poolname get existing_$case3 $dir/existing & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #5 + if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_divergent_ec() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_ec_pool $poolname true k=2 m=1 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$testobjects $dummyfile2 & + sleep 1 + rados -p $poolname put existing_$testobjects $dummyfile & + rados -p $poolname mksnap snap1 + rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + sleep 5 + #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # Dump logs + for i in $non_divergent + do + kill_daemons $dir KILL osd.$i || return 1 + _objectstore_tool_nodown $dir $i --op log --pgid $pgid + activate_osd $dir $i || return 1 + done + _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #3 + # XXX: Not reproducing this case +# if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log') +# then +# echo failure +# return 1 +# fi + # Check for _merge_object_divergent_entries for case #4 + if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# Special case divergence test with ceph-objectstore-tool export/remove/import +# Test handling of divergent entries with prior_version +# prior to log_tail and a ceph-objectstore-tool export/import +# based on qa/tasks/divergent_prior2.py +function TEST_divergent_2() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# this is the same as case _2 above, except we enable pg autoscaling in order +# to reproduce https://tracker.ceph.com/issues/41816 +function TEST_divergent_3() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + # reproduce https://tracker.ceph.com/issues/41816 + ceph osd pool set $poolname pg_autoscale_mode on + + divergent=-1 + start_time=$(date +%s) + max_duration=300 + + while [ "$divergent" -le -1 ] + do + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + + current_time=$(date +%s) + elapsed_time=$(expr $current_time - $start_time) + if [ "$elapsed_time" -gt "$max_duration" ]; then + echo "timed out waiting for divergent" + return 1 + fi + done + + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main divergent-priors "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh" +# End: diff --git a/qa/standalone/osd/ec-error-rollforward.sh b/qa/standalone/osd/ec-error-rollforward.sh new file mode 100755 index 000000000..621e6b13f --- /dev/null +++ b/qa/standalone/osd/ec-error-rollforward.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_ec_error_rollforward() { + local dir=$1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd + ceph osd pool create ec 1 1 erasure ec-profile + + rados -p ec put foo /etc/passwd + + kill -STOP $(cat $dir/osd.2.pid) + + rados -p ec rm foo & + pids="$!" + sleep 1 + rados -p ec rm a & + pids+=" $!" + rados -p ec rm b & + pids+=" $!" + rados -p ec rm c & + pids+=" $!" + sleep 1 + # Use SIGKILL so stopped osd.2 will terminate + # and kill_daemons waits for daemons to die + kill_daemons $dir KILL osd + kill $pids + wait + + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + activate_osd $dir 3 || return 1 + + wait_for_clean || return 1 +} + +main ec-error-rollforward "$@" diff --git a/qa/standalone/osd/osd-bench.sh b/qa/standalone/osd/osd-bench.sh new file mode 100755 index 000000000..eb1a6a440 --- /dev/null +++ b/qa/standalone/osd/osd-bench.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7106" # git grep '\<7106\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-bluestore 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bench() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + local osd_bench_small_size_max_iops=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_small_size_max_iops) + local osd_bench_large_size_max_throughput=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_large_size_max_throughput) + local osd_bench_max_block_size=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_max_block_size) + local osd_bench_duration=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_duration) + + # + # block size too high + # + expect_failure $dir osd_bench_max_block_size \ + ceph tell osd.0 bench 1024 $((osd_bench_max_block_size + 1)) || return 1 + + # + # count too high for small (< 1MB) block sizes + # + local bsize=1024 + local max_count=$(($bsize * $osd_bench_duration * $osd_bench_small_size_max_iops)) + expect_failure $dir bench_small_size_max_iops \ + ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1 + + # + # count too high for large (>= 1MB) block sizes + # + local bsize=$((1024 * 1024 + 1)) + local max_count=$(($osd_bench_large_size_max_throughput * $osd_bench_duration)) + expect_failure $dir osd_bench_large_size_max_throughput \ + ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1 + + # + # default values should work + # + ceph tell osd.0 bench || return 1 + + # + # test object_size < block_size + ceph tell osd.0 bench 10 14456 4444 3 + # + + # + # test object_size < block_size & object_size = 0(default value) + # + ceph tell osd.0 bench 1 14456 +} + +main osd-bench "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh new file mode 100755 index 000000000..aedfbc9b5 --- /dev/null +++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -0,0 +1,497 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +[ `uname` = FreeBSD ] && exit 0 + +function run() { + local dir=$1 + shift + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bluestore() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=2147483648 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_size=536870912 " + CEPH_ARGS+="--bluestore_block_wal_create=true " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + run_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + run_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + run_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + echo "after bench" + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # expand slow devices + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + truncate $dir/0/block -s 4294967296 # 4GB + ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1 + truncate $dir/1/block -s 4311744512 # 4GB + 16MB + ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1 + truncate $dir/2/block -s 4295099392 # 4GB + 129KB + ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1 + truncate $dir/3/block -s 4293918720 # 4GB - 1MB + ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1 + + # slow, DB, WAL -> slow, DB + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes + + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.wal \ + --dev-target $dir/0/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB, WAL -> slow, WAL + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --dev-target $dir/1/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow, DB, WAL -> slow + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block.wal \ + --devs-source $dir/2/block.db \ + --dev-target $dir/2/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow, DB, WAL -> slow, WAL (negative case) + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.db \ + --dev-target $dir/3/block.wal \ + --command bluefs-bdev-migrate + + # Migration to WAL is unsupported + if [ $? -eq 0 ]; then + return 1 + fi + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + # slow, DB, WAL -> slow, DB (WAL to slow then slow to DB) + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.wal \ + --dev-target $dir/3/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block \ + --dev-target $dir/3/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + wait_for_clean || return 1 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # slow, DB -> slow, DB, WAL + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + dd if=/dev/zero of=$dir/0/wal count=512 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --dev-target $dir/0/wal \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, WAL -> slow, DB, WAL + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + dd if=/dev/zero of=$dir/1/db count=1024 bs=1M + ceph-bluestore-tool --path $dir/1 \ + --dev-target $dir/1/db \ + --command bluefs-bdev-new-db || return 1 + + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block \ + --dev-target $dir/1/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow -> slow, DB, WAL + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --command bluefs-bdev-new-db || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --dev-target $dir/2/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow, DB -> slow, WAL + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.db \ + --dev-target $dir/3/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # slow, DB1, WAL -> slow, DB2, WAL + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.db \ + --dev-target $dir/0/db2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB, WAL1 -> slow, DB, WAL2 + + dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.wal \ + --dev-target $dir/0/wal2 \ + --command bluefs-bdev-migrate || return 1 + rm -rf $dir/0/wal + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB + WAL -> slow, DB2 -> slow + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --devs-source $dir/1/block.wal \ + --dev-target $dir/1/db2 \ + --command bluefs-bdev-migrate || return 1 + + rm -rf $dir/1/db + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --dev-target $dir/1/block \ + --command bluefs-bdev-migrate || return 1 + + rm -rf $dir/1/db2 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow -> slow, DB (negative case) + ceph-objectstore-tool --type bluestore --data-path $dir/2 \ + --op fsck --no-mon-config || return 1 + + dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --dev-target $dir/2/db2 \ + --command bluefs-bdev-migrate + + # Migration from slow-only to new device is unsupported + if [ $? -eq 0 ]; then + return 1 + fi + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow + DB + WAL -> slow, DB2 + dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M + + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --devs-source $dir/2/block.db \ + --devs-source $dir/2/block.wal \ + --dev-target $dir/2/db2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow + WAL -> slow2, WAL2 + dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block \ + --devs-source $dir/3/block.wal \ + --dev-target $dir/3/wal2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + wait_for_clean || return 1 +} + +function TEST_bluestore2() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_create=false " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + CEPH_ARGS+="--osd_pool_default_size=1 " + CEPH_ARGS+="--osd_pool_default_min_size=1 " + CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + sleep 5 + create_pool foo 16 + + retry = 0 + while [[ $retry -le 5 ]]; do + # write some objects + timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1 + + #give RocksDB some time to cooldown and put files to slow level(s) + sleep 10 + + db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" ) + spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" ) + ((retry+=1)) + test $spilled_over -eq 0 || break + done + test $spilled_over -gt 0 || return 1 + + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.db \ + --dev-target $dir/0/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 \ + --command bluefs-bdev-sizes || return 1 + + ceph-bluestore-tool --path $dir/0 \ + --command fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + wait_for_clean || return 1 +} + +function TEST_bluestore_expand() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_create=false " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + CEPH_ARGS+="--osd_pool_default_size=1 " + CEPH_ARGS+="--osd_pool_default_min_size=1 " + CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + sleep 5 + create_pool foo 16 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + sleep 5 + + total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" ) + free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2` + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + # destage allocation to file before expand (in case fast-shutdown skipped that step) + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1 + + # expand slow devices + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1 + + requested_space=4294967296 # 4GB + truncate $dir/0/block -s $requested_space + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1 + + # slow, DB, WAL -> slow, DB + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1 + + # compare allocation-file with RocksDB state + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1 + + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + wait_for_clean || return 1 + + total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" ) + free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2` + + if [$total_space_after != $requested_space]; then + echo "total_space_after = $total_space_after" + echo "requested_space = $requested_space" + return 1; + fi + + total_space_added=$((total_space_after - total_space_before)) + free_space_added=$((free_space_after - free_space_before)) + + let new_used_space=($total_space_added - $free_space_added) + echo $new_used_space + # allow upto 128KB to be consumed + if [ $new_used_space -gt 131072 ]; then + echo "total_space_added = $total_space_added" + echo "free_space_added = $free_space_added" + return 1; + fi + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1 +} + +main osd-bluefs-volume-ops "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh" +# End: diff --git a/qa/standalone/osd/osd-config.sh b/qa/standalone/osd/osd-config.sh new file mode 100755 index 000000000..126c2f7de --- /dev/null +++ b/qa/standalone/osd/osd-config.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_config_init() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local stale=1000 + local cache=500 + run_osd $dir 0 \ + --osd-map-cache-size=$cache \ + --osd-pg-epoch-persisted-max-stale=$stale \ + || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 +} + +function TEST_config_track() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + local osd_map_cache_size=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_map_cache_size) + local osd_pg_epoch_persisted_max_stale=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_pg_epoch_persisted_max_stale) + + # + # increase the osd_pg_epoch_persisted_max_stale above the default cache_size + # + ! grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 + local stale=$(($osd_map_cache_size * 2)) + ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 + rm $dir/osd.0.log + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log reopen || return 1 +} + +function TEST_default_adjustment() { + a=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin) + b=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin default) + c=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin arg) + [ "$a" != "default" ] || return 1 + [ "$b" = "default" ] || return 1 + [ "$c" = "arg" ] || return 1 + + a=$(ceph-osd --no-mon-config --show-config-value log_to_file) + b=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false) + c=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false --log-to-file) + [ "$a" = "true" ] || return 1 + [ "$b" = "false" ] || return 1 + [ "$c" = "true" ] || return 1 +} + +main osd-config "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-config.sh" +# End: diff --git a/qa/standalone/osd/osd-copy-from.sh b/qa/standalone/osd/osd-copy-from.sh new file mode 100755 index 000000000..8ac0ab541 --- /dev/null +++ b/qa/standalone/osd/osd-copy-from.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Sage Weil <sage@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7111" # git grep '\<7111\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_copy_from() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + + # success + rados -p rbd put foo $(which rados) + rados -p rbd cp foo foo2 + rados -p rbd stat foo2 + + # failure + ceph tell osd.\* injectargs -- --osd-debug-inject-copyfrom-error + ! rados -p rbd cp foo foo3 + ! rados -p rbd stat foo3 + + # success again + ceph tell osd.\* injectargs -- --no-osd-debug-inject-copyfrom-error + ! rados -p rbd cp foo foo3 + rados -p rbd stat foo3 +} + +main osd-copy-from "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/osd/osd-dup.sh b/qa/standalone/osd/osd-dup.sh new file mode 100755 index 000000000..ab442c538 --- /dev/null +++ b/qa/standalone/osd/osd-dup.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +[ `uname` = FreeBSD ] && exit 0 + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # avoid running out of fds in rados bench + CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 " + CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +main osd-dup "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh" +# End: diff --git a/qa/standalone/osd/osd-fast-mark-down.sh b/qa/standalone/osd/osd-fast-mark-down.sh new file mode 100755 index 000000000..0ef9d8ce4 --- /dev/null +++ b/qa/standalone/osd/osd-fast-mark-down.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Piotr Dałek <git@predictor.org.pl> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +MAX_PROPAGATION_TIME=30 + +function run() { + local dir=$1 + shift + rm -f $dir/*.pid + export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + OLD_ARGS=$CEPH_ARGS + CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false " + echo "Ensuring old behavior is there..." + test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1) + + CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true " + OLD_ARGS=$CEPH_ARGS + + CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON" + echo "Testing async msgr..." + test_fast_kill $dir || return 1 + + return 0 + +} + +function test_fast_kill() { + # create cluster with 3 osds + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for oi in {0..2}; do + run_osd $dir $oi || return 1 + pids[$oi]=$(cat $dir/osd.$oi.pid) + done + + create_rbd_pool || return 1 + + # make some objects so osds to ensure connectivity between osds + timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1 + sleep 1 + + killid=0 + previd=0 + + # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased. + for i in {1..2}; do + while [ $killid -eq $previd ]; do + killid=${pids[$RANDOM%${#pids[@]}]} + done + previd=$killid + + kill -9 $killid + time_left=$MAX_PROPAGATION_TIME + down_osds=0 + + while [ $time_left -gt 0 ]; do + sleep 1 + time_left=$[$time_left - 1]; + + grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null + if [ $? -ne 0 ]; then + continue + fi + + down_osds=$(ceph osd tree | grep -c down) + if [ $down_osds -lt $i ]; then + # osds not marked down yet, try again in a second + continue + elif [ $down_osds -gt $i ]; then + echo Too many \($down_osds\) osds died! + return 1 + else + break + fi + done + + if [ $down_osds -lt $i ]; then + echo Killed the OSD, yet it is not marked down + ceph osd tree + return 1 + fi + done + pkill -SIGTERM rados + teardown $dir || return 1 +} + +main osd-fast-mark-down "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh" +# End: diff --git a/qa/standalone/osd/osd-force-create-pg.sh b/qa/standalone/osd/osd-force-create-pg.sh new file mode 100755 index 000000000..ca4b0239e --- /dev/null +++ b/qa/standalone/osd/osd-force-create-pg.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7145" # git grep '\<7145\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reuse_id() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 50 || return 1 + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.0 + kill_daemons $dir TERM osd.1 + kill_daemons $dir TERM osd.2 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force + ceph-objectstore-tool --data-path $dir/1 --op remove --pgid 1.0 --force + ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.0 --force + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + sleep 10 + ceph pg ls | grep 1.0 | grep stale || return 1 + + ceph osd force-create-pg 1.0 --yes-i-really-mean-it || return 1 + wait_for_clean || return 1 +} + +main osd-force-create-pg "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-force-create-pg.sh" +# End: diff --git a/qa/standalone/osd/osd-markdown.sh b/qa/standalone/osd/osd-markdown.sh new file mode 100755 index 000000000..5c4a78440 --- /dev/null +++ b/qa/standalone/osd/osd-markdown.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Intel <contact@intel.com.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Xiaoxi Chen <xiaoxi.chen@intel.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7108" # git grep '\<7108\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function markdown_N_impl() { + markdown_times=$1 + total_time=$2 + sleeptime=$3 + for i in `seq 1 $markdown_times` + do + # check the OSD is UP + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree + ceph osd tree | grep osd.0 |grep up || return 1 + # mark the OSD down. + # override any dup setting in the environment to ensure we do this + # exactly once (modulo messenger failures, at least; we can't *actually* + # provide exactly-once semantics for mon commands). + ( unset CEPH_CLI_TEST_DUP_COMMAND ; ceph osd down 0 ) + sleep $sleeptime + done +} + + +function TEST_markdown_exceed_maxdown_count() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3+1 times within 300s, osd should stay dead on the 4th time + local count=3 + local sleeptime=10 + local period=300 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $(($count+1)) $period $sleeptime + # down N+1 times ,the osd.0 should die + ceph osd tree | grep down | grep osd.0 || return 1 +} + +function TEST_markdown_boot() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3 times within 120s, should stay up + local count=3 + local sleeptime=10 + local period=120 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $count $period $sleeptime + #down N times, osd.0 should be up + sleep 15 # give osd plenty of time to notice and come back up + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree | grep up | grep osd.0 || return 1 +} + +function TEST_markdown_boot_exceed_time() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3+1 times, but over 40s, > 20s, so should stay up + local count=3 + local period=20 + local sleeptime=10 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $(($count+1)) $period $sleeptime + sleep 15 # give osd plenty of time to notice and come back up + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree | grep up | grep osd.0 || return 1 +} + +function TEST_osd_stop() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + osd_0_pid=$(cat $dir/osd.0.pid) + ps -p $osd_0_pid || return 1 + + ceph osd tree | grep osd.0 | grep up || return 1 + ceph osd stop osd.0 + sleep 15 # give osd plenty of time to notice and exit + ceph osd tree | grep down | grep osd.0 || return 1 + ! ps -p $osd_0_pid || return 1 +} + +main osd-markdown "$@" diff --git a/qa/standalone/osd/osd-reactivate.sh b/qa/standalone/osd/osd-reactivate.sh new file mode 100755 index 000000000..6d6438629 --- /dev/null +++ b/qa/standalone/osd/osd-reactivate.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# +# Author: Vicente Cheng <freeze.bilsted@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reactivate() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + kill_daemons $dir TERM osd || return 1 + + ready_path=$dir"/0/ready" + activate_path=$dir"/0/active" + # trigger mkfs again + rm -rf $ready_path $activate_path + activate_osd $dir 0 || return 1 + +} + +main osd-reactivate "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reactivate.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-prio.sh b/qa/standalone/osd/osd-recovery-prio.sh new file mode 100755 index 000000000..02b65f67a --- /dev/null +++ b/qa/standalone/osd/osd-recovery-prio.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + # Set osd op queue = wpq for the tests. Recovery priority is not + # considered by mclock_scheduler leading to unexpected results. + CEPH_ARGS+="--osd-op-queue=wpq " + export objects=200 + export poolprefix=test + export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED + export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_recovery_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + local max_tries=10 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool3 size 1 --yes-i-really-mean-it + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set norecover + ceph osd set noout + + # Get a pg to want to recover and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-recovery" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The first force-recovery PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-recovery" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + #ceph osd unset norecover + ceph pg cancel-force-recovery $PG3 || return 1 + sleep 2 + #ceph osd set norecover + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-recovery $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-recovery $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-recovery PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset norecover + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to recovery priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start recovery by changing the pool sizes from 1 to 2 +# Use dump_recovery_reservations to verify priorities +function TEST_recovery_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio) + pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + + # Wait for both PGs to be in recovering state + ceph pg dump pgs + + # Wait for recovery to start + set -o pipefail + count=0 + while(true) + do + if test $(ceph --format json pg dump pgs | + jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2" + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Recovery never started on both PGs" + return 1 + fi + count=$(expr $count + 1) + done + set +o pipefail + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG for $pool2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-recovery-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-space.sh b/qa/standalone/osd/osd-recovery-space.sh new file mode 100755 index 000000000..3bafc5138 --- /dev/null +++ b/qa/standalone/osd/osd-recovery-space.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_backfills=10 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_state() { + local state=$1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state -gt 0 && break + if (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_recovery_toofull() { + local timeout=$1 + wait_for_state recovery_toofull $timeout +} + + +# Create 1 pools with size 1 +# set ful-ratio to 50% +# Write data 600 5K (3000K) +# Inject fake_statfs_for_testing to 3600K (83% full) +# Incresase the pool size to 2 +# The pool shouldn't have room to recovery +function TEST_recovery_test_simple() { + local dir=$1 + local pools=1 + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-nearfull-ratio .40 + ceph osd set-backfillfull-ratio .45 + ceph osd set-full-ratio .50 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=5 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + + for o in $(seq 0 $(expr $OSDS - 1)) + do + ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1 + done + sleep 5 + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + + # If this times out, we'll detected errors below + wait_for_recovery_toofull 30 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in recovery_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_ERR" ]; then + echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then + echo "PG_RECOVERY_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + + +main osd-recovery-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-stats.sh b/qa/standalone/osd/osd-recovery-stats.sh new file mode 100755 index 000000000..ad6f810d7 --- /dev/null +++ b/qa/standalone/osd/osd-recovery-stats.sh @@ -0,0 +1,512 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function below_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 )) +} + +function above_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 )) +} + +FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"' +FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"' +FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"' + +function check() { + local dir=$1 + local PG=$2 + local primary=$3 + local type=$4 + local degraded_start=$5 + local degraded_end=$6 + local misplaced_start=$7 + local misplaced_end=$8 + local primary_start=${9:-} + local primary_end=${10:-} + + local log=$dir/osd.${primary}.log + + local addp=" " + if [ "$type" = "erasure" ]; + then + addp="p" + fi + + UPACT=$(eval $FIND_UPACT) + + # Check 3rd line at start because of false recovery starts + local which="degraded" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $degraded_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $degraded_end || return 1 + + # Check 3rd line at start because of false recovery starts + which="misplaced" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $misplaced_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $misplaced_end || return 1 + + # This is the value of set into MISSING_ON_PRIMARY + if [ -n "$primary_start" ]; + then + which="shard $primary" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $primary_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $primary_end || return 1 + fi +} + +# [1,0,?] -> [1,2,4] +# degraded 500 -> 0 +# active+recovering+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467 +function do_recovery_out1() { + local dir=$1 + shift + local type=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + if [ $type = "erasure" ]; + then + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 $type myprofile + else + create_pool $poolname 1 1 $type + fi + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + check $dir $PG $primary $type $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_recovery_replicated_out1() { + local dir=$1 + + do_recovery_out1 $dir replicated || return 1 +} + +function TEST_recovery_erasure_out1() { + local dir=$1 + + do_recovery_out1 $dir erasure || return 1 +} + +# [0, 1] -> [2,3,4,5] +# degraded 1000 -> 0 +# misplaced 1000 -> 0 +# missing on primary 500 -> 0 + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748 +function TEST_recovery_sizeup() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + ceph osd out osd.$primary osd.$otherosd + ceph osd pool set test size 4 + ceph osd unset norecover + # Get new primary + primary=$(get_primary $poolname obj1) + + ceph tell osd.${primary} debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local degraded=$(expr $objects \* 2) + local misplaced=$(expr $objects \* 2) + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [0, 1, 2, 4] -> [3, 5] +# misplaced 1000 -> 0 +# missing on primary 500 -> 0 +# active+recovering+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248 +function TEST_recovery_sizedown() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 4 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local allosds=$(get_osds $poolname obj1) + + ceph osd set norecover + for osd in $allosds + do + ceph osd out osd.$osd + done + + ceph osd pool set test size 2 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + # Get new primary + primary=$(get_primary $poolname obj1) + + local misplaced=$(expr $objects \* 2) + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1 + + UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/") + + # This is the value of set into MISSING_ON_PRIMARY + FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/") + below_margin $FIRST $objects || return 1 + LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/") + above_margin $LAST 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1] -> [1,2] +# degraded 300 -> 200 +# active+recovering+undersized+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563 +function TEST_recovery_undersized() { + local dir=$1 + + local osds=3 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $osds - 1)) + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + + ceph osd set norecover + # Mark any osd not the primary (only 1 replica so also has no replica) + for i in $(seq 0 $(expr $osds - 1)) + do + if [ $i = $primary ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd pool set test size 4 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean() + sleep 10 + flush_pg_stats || return 1 + + # Wait for recovery to finish + # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded + # to active+undersized+degraded + for i in $(seq 1 300) + do + if ceph pg dump pgs | grep ^$PG | grep -qv recovering + then + break + fi + if [ $i = "300" ]; + then + echo "Timeout waiting for recovery to finish" + return 1 + fi + sleep 1 + done + + # Get new primary + primary=$(get_primary $poolname obj1) + local log=$dir/osd.${primary}.log + + local first_degraded=$(expr $objects \* 3) + local last_degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0,2] -> [1,3,NONE]/[1,3,2] +# degraded 100 -> 0 +# misplaced 100 -> 100 +# active+recovering+degraded+remapped + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242 +function TEST_recovery_erasure_remapped() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + + # Mark osd not the primary and not down/out osd as just out + for i in 0 1 2 3 + do + if [ $i = $primary ]; + then + continue + fi + if [ $i = $otherosd ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local log=$dir/osd.${primary}.log + check $dir $PG $primary erasure $objects 0 $objects $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_recovery_multi() { + local dir=$1 + + local osds=6 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $osds - 1)) + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 1 + + wait_for_clean || return 1 + + rados -p $poolname put obj1 /dev/null + + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + + local half=$(expr $objects / 2) + for i in $(seq 2 $half) + do + rados -p $poolname put obj$i /dev/null + done + + kill $(cat $dir/osd.${primary}.pid) + ceph osd down osd.${primary} + activate_osd $dir ${otherosd} + sleep 3 + + for i in $(seq $(expr $half + 1) $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + local otherosd=$(get_not_primary $poolname obj$objects) + + ceph osd unset noout + ceph osd out osd.$primary osd.$otherosd + activate_osd $dir ${primary} + sleep 3 + + ceph osd pool set test size 4 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + # Get new primary + primary=$(get_primary $poolname obj1) + + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main osd-recovery-stats "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh" +# End: diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh new file mode 100755 index 000000000..6fea441b3 --- /dev/null +++ b/qa/standalone/osd/osd-rep-recov-eio.sh @@ -0,0 +1,422 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# +# Author: Kefu Chai <kchai@redhat.com> +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +warnings=10 + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + # set warning amount in case default changes + run_mon $dir a --mon_osd_warn_num_repaired=$warnings || return 1 + run_mgr $dir x || return 1 + ceph osd pool create foo 8 || return 1 + + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function setup_osds() { + local count=$1 + shift + local type=$1 + + for id in $(seq 0 $(expr $count - 1)) ; do + run_osd${type} $dir $id || return 1 + done + wait_for_clean || return 1 +} + +function get_state() { + local pgid=$1 + local sname=state + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function rados_put() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 +} + +function rados_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + local expect=${4:-ok} + + # + # Expect a failure to get object + # + if [ $expect = "fail" ]; + then + ! rados --pool $poolname get $objname $dir/COPY + return + fi + # + # Expect hang trying to get object + # + if [ $expect = "hang" ]; + then + timeout 5 rados --pool $poolname get $objname $dir/COPY + test "$?" = "124" + return + fi + # + # get an object, compare with $dir/ORIGINAL + # + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY +} + +function rados_get_data() { + local inject=$1 + shift + local dir=$1 + + local poolname=pool-rep + local objname=obj-$inject-$$ + local pgid=$(get_pg $poolname $objname) + + rados_put $dir $poolname $objname || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 + rados_get $dir $poolname $objname || return 1 + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "1" || return 1 + + local object_osds=($(get_osds $poolname $objname)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname $objname || return 1 + + # Wait until automatic repair of bad peer is done + wait_for_clean || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 + rados_get $dir $poolname $objname || return 1 + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "3" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "4" || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 1 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 + rados_get $dir $poolname $objname hang || return 1 + + wait_for_clean + # After hang another repair couldn't happen, so count stays the same + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "3" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "4" || return 1 +} + +function TEST_rados_get_with_eio() { + local dir=$1 + + setup_osds 4 || return 1 + + local poolname=pool-rep + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + rados_get_data eio $dir || return 1 + + delete_pool $poolname +} + +function TEST_rados_repair_warning() { + local dir=$1 + local OBJS=$(expr $warnings + 1) + + setup_osds 4 || return 1 + + local poolname=pool-rep + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + local poolname=pool-rep + local objbase=obj-warn + local inject=eio + + for i in $(seq 1 $OBJS) + do + rados_put $dir $poolname ${objbase}-$i || return 1 + inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1 + rados_get $dir $poolname ${objbase}-$i || return 1 + done + local pgid=$(get_pg $poolname ${objbase}-1) + + local object_osds=($(get_osds $poolname ${objbase}-1)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$OBJS" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$OBJS" || return 1 + + ceph health | grep -q "Too many repaired reads on 1 OSDs" || return 1 + ceph health detail | grep -q "osd.$primary had $OBJS reads repaired" || return 1 + + ceph health mute OSD_TOO_MANY_REPAIRS + set -o pipefail + # Should mute this + ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1 + set +o pipefail + + for i in $(seq 1 $OBJS) + do + inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1 + inject_$inject rep data $poolname ${objbase}-$i $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname ${objbase}-$i || return 1 + done + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$(expr $OBJS \* 2)" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$(expr $OBJS \* 3)" || return 1 + + # Give mon a chance to notice additional OSD and unmute + # The default tick time is 5 seconds + CHECKTIME=10 + LOOPS=0 + while(true) + do + sleep 1 + if ceph health | grep -q "Too many repaired reads on 2 OSDs" + then + break + fi + LOOPS=$(expr $LOOPS + 1) + if test "$LOOPS" = "$CHECKTIME" + then + echo "Too many repaired reads not seen after $CHECKTIME seconds" + return 1 + fi + done + ceph health detail | grep -q "osd.$primary had $(expr $OBJS \* 2) reads repaired" || return 1 + ceph health detail | grep -q "osd.$bad_peer had $OBJS reads repaired" || return 1 + + delete_pool $poolname +} + +# Test backfill with unfound object +function TEST_rep_backfill_unfound() { + local dir=$1 + local objname=myobject + local lastobj=300 + # Must be between 1 and $lastobj + local testobj=obj250 + + export CEPH_ARGS + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 3 || return 1 + + local poolname=test-pool + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio rep data $poolname $testobj $dir 0 || return 1 + inject_eio rep data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 360); do + state=$(get_state 2.0) + echo $state | grep backfill_unfound + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +# Test recovery with unfound object +function TEST_rep_recovery_unfound() { + local dir=$1 + local objname=myobject + local lastobj=100 + # Must be between 1 and $lastobj + local testobj=obj75 + + setup_osds 3 || return 1 + + local poolname=test-pool + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio rep data $poolname $testobj $dir 0 || return 1 + inject_eio rep data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 100); do + state=$(get_state 2.0) + echo $state | grep -v recovering + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +main osd-rep-recov-eio.sh "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh" +# End: diff --git a/qa/standalone/osd/osd-reuse-id.sh b/qa/standalone/osd/osd-reuse-id.sh new file mode 100755 index 000000000..b24b6f2eb --- /dev/null +++ b/qa/standalone/osd/osd-reuse-id.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7123" # git grep '\<7123\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reuse_id() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + destroy_osd $dir 1 || return 1 + run_osd $dir 1 || return 1 +} + +main osd-reuse-id "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reuse-id.sh" +# End: diff --git a/qa/standalone/osd/pg-split-merge.sh b/qa/standalone/osd/pg-split-merge.sh new file mode 100755 index 000000000..7f2899b60 --- /dev/null +++ b/qa/standalone/osd/pg-split-merge.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_a_merge_empty() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 2 || return 1 + ceph osd pool set foo pgp_num 1 || return 1 + + wait_for_clean || return 1 + + # note: we need 1.0 to have the same or more objects than 1.1 + # 1.1 + rados -p foo put foo1 /etc/passwd + rados -p foo put foo2 /etc/passwd + rados -p foo put foo3 /etc/passwd + rados -p foo put foo4 /etc/passwd + # 1.0 + rados -p foo put foo5 /etc/passwd + rados -p foo put foo6 /etc/passwd + rados -p foo put foo8 /etc/passwd + rados -p foo put foo10 /etc/passwd + rados -p foo put foo11 /etc/passwd + rados -p foo put foo12 /etc/passwd + rados -p foo put foo16 /etc/passwd + + wait_for_clean || return 1 + + ceph tell osd.1 config set osd_debug_no_purge_strays true + ceph osd pool set foo size 2 || return 1 + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.2 || return 1 + ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1 + activate_osd $dir 2 || return 1 + + wait_for_clean || return 1 + + # osd.2: now 1.0 is there but 1.1 is not + + # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is + # the problematic state... then let it merge with 1.0 + ceph tell osd.2 config set osd_debug_no_acting_change true + ceph osd out 0 1 + ceph osd pool set foo pg_num 1 + sleep 5 + ceph tell osd.2 config set osd_debug_no_acting_change false + + # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get + # removed + ceph osd in 0 1 + ceph osd pool set foo size 3 + + wait_for_clean || return 1 + + # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing + # half of its objects). + ceph pg scrub 1.0 + sleep 10 + ceph log last debug + ceph pg ls + ceph pg ls | grep ' active.clean ' || return 1 +} + +function TEST_import_after_merge_and_gap() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd pool create foo 2 || return 1 + wait_for_clean || return 1 + rados -p foo bench 3 write -b 1024 --no-cleanup || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1 + activate_osd $dir 0 || return 1 + + ceph osd pool set foo pg_num 1 + sleep 5 + while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done + wait_for_clean || return 1 + + # + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + # this will import both halves the original pg + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 + + # make a map gap + for f in `seq 1 50` ; do + ceph osd set nodown + ceph osd unset nodown + done + + # poke and prod to ensure last_epech_clean is big, reported to mon, and + # the osd is able to trim old maps + rados -p foo bench 1 write -b 1024 --no-cleanup || return 1 + wait_for_clean || return 1 + ceph tell osd.0 send_beacon + sleep 5 + ceph osd set nodown + ceph osd unset nodown + sleep 5 + + kill_daemons $dir TERM osd.0 || return 1 + + # this should fail.. 1.1 still doesn't exist + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1 + + # this should fail too because of the gap + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + # we can force it... + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1 + + # ...but the osd won't start, so remove it again. + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1 + + + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 +} + +function TEST_import_after_split() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd pool create foo 1 || return 1 + wait_for_clean || return 1 + rados -p foo bench 3 write -b 1024 --no-cleanup || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1 + activate_osd $dir 0 || return 1 + + ceph osd pool set foo pg_num 2 + sleep 5 + while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + + # this should fail because 1.1 (split child) is there + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1 + # now it will work (1.1. is gone) + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 +} + + +main pg-split-merge "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh" +# End: diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh new file mode 100755 index 000000000..af406ef92 --- /dev/null +++ b/qa/standalone/osd/repeer-on-acting-back.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn> +# +# Author: xie xingguo <xie.xingguo@zte.com.cn> +# Author: Yan Jun <yan.jun8@zte.com.cn> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export poolname=test + export testobjects=100 + export loglen=12 + export trim=$(expr $loglen / 2) + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + # use small pg_log settings, so we always do backfill instead of recovery + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_repeer_on_down_acting_member_coming_back() { + local dir=$1 + local dummyfile='/etc/fstab' + + local num_osds=6 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid') + local pgid=$poolid.0 + + # enable required feature-bits for upmap + ceph osd set-require-min-compat-client luminous + # reset up to [1,2,3] + ceph osd pg-upmap $pgid 1 2 3 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # reset up to [1,4,5] + ceph osd pg-upmap $pgid 1 4 5 || return 1 + + # wait for peering to complete + sleep 2 + + # make sure osd.2 belongs to current acting set + ceph pg $pgid query | jq '.acting' | grep 2 || return 1 + + # kill osd.2 + kill_daemons $dir KILL osd.2 || return 1 + ceph osd down osd.2 + + # again, wait for peering to complete + sleep 2 + + # osd.2 should have been moved out from acting set + ceph pg $pgid query | jq '.acting' | grep 2 && return 1 + + # bring up osd.2 + activate_osd $dir 2 || return 1 + wait_for_osd up 2 + + # again, wait for peering to complete + sleep 2 + + # primary should be able to re-add osd.2 into acting + ceph pg $pgid query | jq '.acting' | grep 2 || return 1 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main repeer-on-acting-back "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh" +# End: diff --git a/qa/standalone/osd/repro_long_log.sh b/qa/standalone/osd/repro_long_log.sh new file mode 100755 index 000000000..fa27d7017 --- /dev/null +++ b/qa/standalone/osd/repro_long_log.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: Josh Durgin <jdurgin@redhat.com> +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +PGID= + +function test_log_size() +{ + local PGID=$1 + local EXPECTED=$2 + local DUPS_EXPECTED=${3:-0} + ceph tell osd.\* flush_pg_stats + sleep 3 + ceph pg $PGID query | jq .info.stats.log_size + ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}" + ceph pg $PGID query | jq .info.stats.log_dups_size + ceph pg $PGID query | jq .info.stats.log_dups_size | grep "${DUPS_EXPECTED}" +} + +function setup_log_test() { + local dir=$1 + local which=$2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create test 1 1 || true + POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool') + PGID="${POOL_ID}.0" + + # With 1 PG setting entries per osd 20 results in a target log of 20 + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 20 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1 + ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 + + touch $dir/foo + for i in $(seq 1 20) + do + rados -p test put foo $dir/foo || return 1 + done + + test_log_size $PGID 20 || return 1 + + rados -p test rm foo || return 1 + + # generate error entries + for i in $(seq 1 20) + do + rados -p test rm foo + done + + # log should have been trimmed down to min_entries with one extra + test_log_size $PGID 21 || return 1 +} + +function TEST_repro_long_log1() +{ + local dir=$1 + + setup_log_test $dir || return 1 + # regular write should trim the log + rados -p test put foo $dir/foo || return 1 + test_log_size $PGID 22 || return 1 +} + +function TEST_repro_long_log2() +{ + local dir=$1 + + setup_log_test $dir || return 1 + local PRIMARY=$(ceph pg $PGID query | jq '.info.stats.up_primary') + kill_daemons $dir TERM osd.$PRIMARY || return 1 + CEPH_ARGS="--osd-max-pg-log-entries=2 --osd-pg-log-dups-tracked=3 --no-mon-config" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1 + activate_osd $dir $PRIMARY || return 1 + wait_for_clean || return 1 + test_log_size $PGID 21 18 || return 1 +} + +function TEST_trim_max_entries() +{ + local dir=$1 + + setup_log_test $dir || return 1 + + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 0 + + # adding log entries, should only trim 4 and add one each time + rados -p test rm foo + test_log_size $PGID 18 || return 1 + rados -p test rm foo + test_log_size $PGID 15 || return 1 + rados -p test rm foo + test_log_size $PGID 12 || return 1 + rados -p test rm foo + test_log_size $PGID 9 || return 1 + rados -p test rm foo + test_log_size $PGID 6 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 + + # below trim_min + rados -p test rm foo + test_log_size $PGID 4 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 + rados -p test rm foo + test_log_size $PGID 4 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 +} + +function TEST_trim_max_entries_with_dups() +{ + local dir=$1 + + setup_log_test $dir || return 1 + + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 + + # adding log entries, should only trim 4 and add one each time + # dups should be trimmed to 1 + rados -p test rm foo + test_log_size $PGID 18 2 || return 1 + rados -p test rm foo + test_log_size $PGID 15 6 || return 1 + rados -p test rm foo + test_log_size $PGID 12 10 || return 1 + rados -p test rm foo + test_log_size $PGID 9 14 || return 1 + rados -p test rm foo + test_log_size $PGID 6 18 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 + + # below trim_min + rados -p test rm foo + test_log_size $PGID 4 20 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 + rados -p test rm foo + test_log_size $PGID 4 20 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 +} + +main repro-long-log "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh" +# End: |