diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /qa/standalone/scrub/osd-scrub-test.sh | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'qa/standalone/scrub/osd-scrub-test.sh')
-rwxr-xr-x | qa/standalone/scrub/osd-scrub-test.sh | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh new file mode 100755 index 000000000..5dd029c35 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -0,0 +1,457 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +function TEST_scrub_test() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + if [ "$otherosd" = "2" ]; + then + local anotherosd="0" + else + local anotherosd="2" + fi + + objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab + + local pgid="${poolid}.0" + pg_deep_scrub "$pgid" || return 1 + + ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1 + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1 + + ceph osd out $primary + wait_for_clean || return 1 + + pg_deep_scrub "$pgid" || return 1 + + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1 + + ceph osd in $primary + wait_for_clean || return 1 + + repair "$pgid" || return 1 + wait_for_clean || return 1 + + # This sets up the test after we've repaired with previous primary has old value + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1 + + ceph osd out $primary + wait_for_clean || return 1 + + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1 + + teardown $dir || return 1 +} + +# Grab year-month-day +DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/" +DATEFORMAT="%Y-%m-%d" + +function check_dump_scrubs() { + local primary=$1 + local sched_time_check="$2" + local deadline_check="$3" + + DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)" + # use eval to drop double-quotes + eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time') + test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1 + # use eval to drop double-quotes + eval DEADLINE=$(echo $DS | jq '.[0].deadline') + test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1 +} + +function TEST_interval_changes() { + local poolname=test + local OSDS=2 + local objects=10 + # Don't assume how internal defaults are set + local day="$(expr 24 \* 60 \* 60)" + local week="$(expr $day \* 7)" + local min_interval=$day + local max_interval=$week + local WAIT_FOR_UPDATE=15 + + TESTDATA="testdata.$$" + + setup $dir || return 1 + # This min scrub interval results in 30 seconds backoff time + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + + # Check initial settings from above (min 1 day, min 1 week) + check_dump_scrubs $primary "1 day" "1 week" || return 1 + + # Change global osd_scrub_min_interval to 2 days + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "1 week" || return 1 + + # Change global osd_scrub_max_interval to 2 weeks + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "2 week" || return 1 + + # Change pool osd_scrub_min_interval to 3 days + ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "2 week" || return 1 + + # Change pool osd_scrub_max_interval to 3 weeks + ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "3 week" || return 1 + + teardown $dir || return 1 +} + +function TEST_scrub_extended_sleep() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + DAY=$(date +%w) + # Handle wrap + if [ "$DAY" -ge "4" ]; + then + DAY="0" + fi + # Start after 2 days in case we are near midnight + DAY_START=$(expr $DAY + 2) + DAY_END=$(expr $DAY + 3) + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_sleep=0 \ + --osd_scrub_extended_sleep=20 \ + --bluestore_cache_autotune=false \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_begin_week_day=$DAY_START \ + --osd_scrub_end_week_day=$DAY_END \ + || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + + # Trigger a scrub on a PG + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph tell $pgid scrub || return 1 + + # Allow scrub to start extended sleep + PASSED="false" + for ((i=0; i < 15; i++)); do + if grep -q "scrub state.*, sleeping" $dir/osd.${primary}.log + then + PASSED="true" + break + fi + sleep 1 + done + + # Check that extended sleep was triggered + if [ $PASSED = "false" ]; + then + return 1 + fi + + # release scrub to run after extended sleep finishes + ceph tell osd.$primary config set osd_scrub_begin_week_day 0 + ceph tell osd.$primary config set osd_scrub_end_week_day 0 + + # Due to extended sleep, the scrub should not be done within 20 seconds + # but test up to 10 seconds and make sure it happens by 25 seconds. + count=0 + PASSED="false" + for ((i=0; i < 25; i++)); do + count=$(expr $count + 1) + if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then + # Did scrub run too soon? + if [ $count -lt "10" ]; + then + return 1 + fi + PASSED="true" + break + fi + sleep 1 + done + + # Make sure scrub eventually ran + if [ $PASSED = "false" ]; + then + return 1 + fi + + teardown $dir || return 1 +} + +function _scrub_abort() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=1000 + local type=$2 + + TESTDATA="testdata.$$" + if test $type = "scrub"; + then + stopscrub="noscrub" + check="noscrub" + else + stopscrub="nodeep-scrub" + check="nodeep_scrub" + fi + + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_sleep=5.0 \ + --osd_scrub_interval_randomize_ratio=0 || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local pgid="${poolid}.0" + + ceph tell $pgid $type || return 1 + # deep-scrub won't start without scrub noticing + if [ "$type" = "deep_scrub" ]; + then + ceph tell $pgid scrub || return 1 + fi + + # Wait for scrubbing to start + set -o pipefail + found="no" + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing" + then + found="yes" + #ceph pg dump pgs + break + fi + done + set +o pipefail + + if test $found = "no"; + then + echo "Scrubbing never started" + return 1 + fi + + ceph osd set $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd set noscrub + fi + + # Wait for scrubbing to end + set -o pipefail + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing" + then + continue + fi + #ceph pg dump pgs + break + done + set +o pipefail + + sleep 5 + + if ! grep "$check set, aborting" $dir/osd.${primary}.log + then + echo "Abort not seen in log" + return 1 + fi + + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph config set osd "osd_scrub_sleep" "0.1" + + ceph osd unset $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd unset noscrub + fi + TIMEOUT=$(($objects / 2)) + wait_for_scrub $pgid "$last_scrub" || return 1 + + teardown $dir || return 1 +} + +function TEST_scrub_abort() { + local dir=$1 + _scrub_abort $dir scrub +} + +function TEST_deep_scrub_abort() { + local dir=$1 + _scrub_abort $dir deep_scrub +} + +function TEST_scrub_permit_time() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//') + local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//') + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --bluestore_cache_autotune=false \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_begin_hour=$scrub_begin_hour \ + --osd_scrub_end_hour=$scrub_end_hour || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + + # Trigger a scrub on a PG + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pgid) + # If we don't specify an amount of time to subtract from + # current time to set last_scrub_stamp, it sets the deadline + # back by osd_max_interval which would cause the time permit checking + # to be skipped. Set back 1 day, the default scrub_min_interval. + ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1 + + # Scrub should not run + for ((i=0; i < 30; i++)); do + if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done + + teardown $dir || return 1 +} + +main osd-scrub-test "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-test.sh" +# End: |