summaryrefslogtreecommitdiffstats
path: root/qa/standalone/scrub/osd-scrub-test.sh
diff options
context:
space:
mode:
Diffstat (limited to 'qa/standalone/scrub/osd-scrub-test.sh')
-rwxr-xr-xqa/standalone/scrub/osd-scrub-test.sh457
1 files changed, 457 insertions, 0 deletions
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
new file mode 100755
index 000000000..5dd029c35
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -0,0 +1,457 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+function TEST_scrub_test() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+ if [ "$otherosd" = "2" ];
+ then
+ local anotherosd="0"
+ else
+ local anotherosd="2"
+ fi
+
+ objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab
+
+ local pgid="${poolid}.0"
+ pg_deep_scrub "$pgid" || return 1
+
+ ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
+ test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
+
+ ceph osd out $primary
+ wait_for_clean || return 1
+
+ pg_deep_scrub "$pgid" || return 1
+
+ test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
+ test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
+ ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
+
+ ceph osd in $primary
+ wait_for_clean || return 1
+
+ repair "$pgid" || return 1
+ wait_for_clean || return 1
+
+ # This sets up the test after we've repaired with previous primary has old value
+ test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
+ ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
+
+ ceph osd out $primary
+ wait_for_clean || return 1
+
+ test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1
+ test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1
+ test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1
+ ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
+
+ teardown $dir || return 1
+}
+
+# Grab year-month-day
+DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/"
+DATEFORMAT="%Y-%m-%d"
+
+function check_dump_scrubs() {
+ local primary=$1
+ local sched_time_check="$2"
+ local deadline_check="$3"
+
+ DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)"
+ # use eval to drop double-quotes
+ eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time')
+ test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1
+ # use eval to drop double-quotes
+ eval DEADLINE=$(echo $DS | jq '.[0].deadline')
+ test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1
+}
+
+function TEST_interval_changes() {
+ local poolname=test
+ local OSDS=2
+ local objects=10
+ # Don't assume how internal defaults are set
+ local day="$(expr 24 \* 60 \* 60)"
+ local week="$(expr $day \* 7)"
+ local min_interval=$day
+ local max_interval=$week
+ local WAIT_FOR_UPDATE=15
+
+ TESTDATA="testdata.$$"
+
+ setup $dir || return 1
+ # This min scrub interval results in 30 seconds backoff time
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local primary=$(get_primary $poolname obj1)
+
+ # Check initial settings from above (min 1 day, min 1 week)
+ check_dump_scrubs $primary "1 day" "1 week" || return 1
+
+ # Change global osd_scrub_min_interval to 2 days
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "2 days" "1 week" || return 1
+
+ # Change global osd_scrub_max_interval to 2 weeks
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "2 days" "2 week" || return 1
+
+ # Change pool osd_scrub_min_interval to 3 days
+ ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "3 days" "2 week" || return 1
+
+ # Change pool osd_scrub_max_interval to 3 weeks
+ ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "3 days" "3 week" || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_scrub_extended_sleep() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ DAY=$(date +%w)
+ # Handle wrap
+ if [ "$DAY" -ge "4" ];
+ then
+ DAY="0"
+ fi
+ # Start after 2 days in case we are near midnight
+ DAY_START=$(expr $DAY + 2)
+ DAY_END=$(expr $DAY + 3)
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_scrub_sleep=0 \
+ --osd_scrub_extended_sleep=20 \
+ --bluestore_cache_autotune=false \
+ --osd_deep_scrub_randomize_ratio=0.0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_scrub_begin_week_day=$DAY_START \
+ --osd_scrub_end_week_day=$DAY_END \
+ || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+
+ # Trigger a scrub on a PG
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph tell $pgid scrub || return 1
+
+ # Allow scrub to start extended sleep
+ PASSED="false"
+ for ((i=0; i < 15; i++)); do
+ if grep -q "scrub state.*, sleeping" $dir/osd.${primary}.log
+ then
+ PASSED="true"
+ break
+ fi
+ sleep 1
+ done
+
+ # Check that extended sleep was triggered
+ if [ $PASSED = "false" ];
+ then
+ return 1
+ fi
+
+ # release scrub to run after extended sleep finishes
+ ceph tell osd.$primary config set osd_scrub_begin_week_day 0
+ ceph tell osd.$primary config set osd_scrub_end_week_day 0
+
+ # Due to extended sleep, the scrub should not be done within 20 seconds
+ # but test up to 10 seconds and make sure it happens by 25 seconds.
+ count=0
+ PASSED="false"
+ for ((i=0; i < 25; i++)); do
+ count=$(expr $count + 1)
+ if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+ # Did scrub run too soon?
+ if [ $count -lt "10" ];
+ then
+ return 1
+ fi
+ PASSED="true"
+ break
+ fi
+ sleep 1
+ done
+
+ # Make sure scrub eventually ran
+ if [ $PASSED = "false" ];
+ then
+ return 1
+ fi
+
+ teardown $dir || return 1
+}
+
+function _scrub_abort() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=1000
+ local type=$2
+
+ TESTDATA="testdata.$$"
+ if test $type = "scrub";
+ then
+ stopscrub="noscrub"
+ check="noscrub"
+ else
+ stopscrub="nodeep-scrub"
+ check="nodeep_scrub"
+ fi
+
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \
+ --osd_deep_scrub_randomize_ratio=0.0 \
+ --osd_scrub_sleep=5.0 \
+ --osd_scrub_interval_randomize_ratio=0 || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local primary=$(get_primary $poolname obj1)
+ local pgid="${poolid}.0"
+
+ ceph tell $pgid $type || return 1
+ # deep-scrub won't start without scrub noticing
+ if [ "$type" = "deep_scrub" ];
+ then
+ ceph tell $pgid scrub || return 1
+ fi
+
+ # Wait for scrubbing to start
+ set -o pipefail
+ found="no"
+ for i in $(seq 0 200)
+ do
+ flush_pg_stats
+ if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing"
+ then
+ found="yes"
+ #ceph pg dump pgs
+ break
+ fi
+ done
+ set +o pipefail
+
+ if test $found = "no";
+ then
+ echo "Scrubbing never started"
+ return 1
+ fi
+
+ ceph osd set $stopscrub
+ if [ "$type" = "deep_scrub" ];
+ then
+ ceph osd set noscrub
+ fi
+
+ # Wait for scrubbing to end
+ set -o pipefail
+ for i in $(seq 0 200)
+ do
+ flush_pg_stats
+ if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing"
+ then
+ continue
+ fi
+ #ceph pg dump pgs
+ break
+ done
+ set +o pipefail
+
+ sleep 5
+
+ if ! grep "$check set, aborting" $dir/osd.${primary}.log
+ then
+ echo "Abort not seen in log"
+ return 1
+ fi
+
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph config set osd "osd_scrub_sleep" "0.1"
+
+ ceph osd unset $stopscrub
+ if [ "$type" = "deep_scrub" ];
+ then
+ ceph osd unset noscrub
+ fi
+ TIMEOUT=$(($objects / 2))
+ wait_for_scrub $pgid "$last_scrub" || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_scrub_abort() {
+ local dir=$1
+ _scrub_abort $dir scrub
+}
+
+function TEST_deep_scrub_abort() {
+ local dir=$1
+ _scrub_abort $dir deep_scrub
+}
+
+function TEST_scrub_permit_time() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
+ local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --bluestore_cache_autotune=false \
+ --osd_deep_scrub_randomize_ratio=0.0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_scrub_begin_hour=$scrub_begin_hour \
+ --osd_scrub_end_hour=$scrub_end_hour || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+
+ # Trigger a scrub on a PG
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ # If we don't specify an amount of time to subtract from
+ # current time to set last_scrub_stamp, it sets the deadline
+ # back by osd_max_interval which would cause the time permit checking
+ # to be skipped. Set back 1 day, the default scrub_min_interval.
+ ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1
+
+ # Scrub should not run
+ for ((i=0; i < 30; i++)); do
+ if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+ return 1
+ fi
+ sleep 1
+ done
+
+ teardown $dir || return 1
+}
+
+main osd-scrub-test "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh osd-scrub-test.sh"
+# End: