diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /qa/standalone/osd-backfill | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rwxr-xr-x | qa/standalone/osd-backfill/osd-backfill-prio.sh | 522 | ||||
-rwxr-xr-x | qa/standalone/osd-backfill/osd-backfill-recovery-log.sh | 139 | ||||
-rwxr-xr-x | qa/standalone/osd-backfill/osd-backfill-space.sh | 1176 | ||||
-rwxr-xr-x | qa/standalone/osd-backfill/osd-backfill-stats.sh | 761 |
4 files changed, 2598 insertions, 0 deletions
diff --git a/qa/standalone/osd-backfill/osd-backfill-prio.sh b/qa/standalone/osd-backfill/osd-backfill-prio.sh new file mode 100755 index 000000000..9749ca34c --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-prio.sh @@ -0,0 +1,522 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + # Set osd op queue = wpq for the tests. Backfill priority is not + # considered by mclock_scheduler leading to unexpected results. + CEPH_ARGS+="--osd-op-queue=wpq " + export objects=50 + export poolprefix=test + export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED + export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10 + export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_backfill_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + local degraded_prio=$(expr $DEGRADED_PRIO + 1) + local max_tries=10 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool3 size 1 --yes-i-really-mean-it + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set nobackfill + ceph osd set noout + + # Get a pg to want to backfill and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-backfill" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-backfill" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + ceph pg cancel-force-backfill $PG3 || return 1 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-backfill $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-backfill $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset nobackfill + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to the backfill priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start backfill by changing the pool sizes from 1 to 2 +# Use dump_recovery_reservations to verify priorities +function TEST_backfill_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio) + pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG ${PG2} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-backfill-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh new file mode 100755 index 000000000..f9a144932 --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7129" # git grep '\<7129\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function _common_test() { + local dir=$1 + local extra_opts="$2" + local loglen="$3" + local dupslen="$4" + local objects="$5" + local moreobjects=${6:-0} + + local OSDS=6 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + export EXTRA_OPTS=" $extra_opts" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + create_pool test 1 1 + + for j in $(seq 1 $objects) + do + rados -p test put obj-${j} /etc/passwd + done + + # Mark out all OSDs for this pool + ceph osd out $(ceph pg dump pgs --format=json | jq '.pg_stats[0].up[]') + if [ "$moreobjects" != "0" ]; then + for j in $(seq 1 $moreobjects) + do + rados -p test put obj-more-${j} /etc/passwd + done + fi + sleep 1 + wait_for_clean + + flush_pg_stats + + newprimary=$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary') + kill_daemons + + ERRORS=0 + _objectstore_tool_nodown $dir $newprimary --no-mon-config --pgid 1.0 --op log | tee $dir/result.log + LOGLEN=$(jq '.pg_log_t.log | length' $dir/result.log) + if [ $LOGLEN != "$loglen" ]; then + echo "FAILED: Wrong log length got $LOGLEN (expected $loglen)" + ERRORS=$(expr $ERRORS + 1) + fi + DUPSLEN=$(jq '.pg_log_t.dups | length' $dir/result.log) + if [ $DUPSLEN != "$dupslen" ]; then + echo "FAILED: Wrong dups length got $DUPSLEN (expected $dupslen)" + ERRORS=$(expr $ERRORS + 1) + fi + grep "copy_up_to\|copy_after" $dir/osd.*.log + rm -f $dir/result.log + if [ $ERRORS != "0" ]; then + echo TEST FAILED + return 1 + fi +} + + +# Cause copy_up_to() to only partially copy logs, copy additional dups, and trim dups +function TEST_backfill_log_1() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2 --osd_pg_log_dups_tracked=10" 2 8 150 +} + + +# Cause copy_up_to() to only partially copy logs, copy additional dups +function TEST_backfill_log_2() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2" 2 148 150 +} + + +# Cause copy_after() to only copy logs, no dups +function TEST_recovery_1() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=50 --osd_max_pg_log_entries=50 --osd_pg_log_dups_tracked=60 --osd_pg_log_trim_min=10" 40 0 40 +} + + +# Cause copy_after() to copy logs with dups +function TEST_recovery_2() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=150 --osd_max_pg_log_entries=150 --osd_pg_log_dups_tracked=3000 --osd_pg_log_trim_min=10" 151 10 141 20 +} + +main osd-backfill-recovery-log "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-recovery-log.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh new file mode 100755 index 000000000..6a5c69412 --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-space.sh @@ -0,0 +1,1176 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + CEPH_ARGS+="--fake_statfs_for_testing=3686400 " + CEPH_ARGS+="--osd_max_backfills=10 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_not_state() { + local state=$1 + local num_in_state=-1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state = "0" && break + if test $cur_in_state != $num_in_state ; then + loop=0 + num_in_state=$cur_in_state + elif (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_not_backfilling() { + local timeout=$1 + wait_for_not_state backfilling $timeout +} + + +function wait_for_not_activating() { + local timeout=$1 + wait_for_not_state activating $timeout +} + +# All tests are created in an environment which has fake total space +# of 3600K (3686400) which can hold 600 6K replicated objects or +# 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool +# we have a theoretical 54K object but with the chunk size of 4K +# and a rounding of 4K to account for the chunks is 36K max object +# which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of +# 3600K for a shard. + +# Create 2 pools with size 1 +# Write enough data that only 1 pool pg can fit per osd +# Incresase the pool size to 2 +# On 3 OSDs this should result in 1 OSD with overlapping replicas, +# so both pools can't fit. We assume pgid 1.0 and 2.0 won't +# map to the same 2 OSDs. +# At least 1 pool shouldn't have room to backfill +# All other pools should go active+clean +function TEST_backfill_test_simple() { + local dir=$1 + local pools=2 + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + # This won't work is if the 2 pools primary and only osds + # are the same. + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + expected="$(expr $pools - 1)" + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; + then + echo "$expected didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + + +# Create 8 pools of size 1 on 20 OSDs +# Write 4K * 600 objects (only 1 pool pg can fit on any given osd) +# Increase pool size to 2 +# At least 1 pool shouldn't have room to backfill +# All other pools should go active+clean +function TEST_backfill_test_multi() { + local dir=$1 + local pools=8 + local OSDS=20 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" + if [ "$full" -lt "1" ]; + then + echo "At least one pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + expected="$(expr $pools - $full)" + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; + then + echo "$expected didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_WARN" ]; then + echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then + echo "PG_BACKFILL_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + + +# To make sure that when 2 pg try to backfill at the same time to +# the same target. This might be covered by the simple test above +# but this makes sure we get it. +# +# Create 10 pools of size 2 and identify 2 that have the same +# non-primary osd. +# Delete all other pools +# Set size to 1 and write 4K * 600 to each pool +# Set size back to 2 +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function TEST_backfill_test_sametarget() { + local dir=$1 + local pools=10 + local OSDS=5 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with a pg that distinct primaries but second + # replica on the same osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1 + local chk_osd2 + + local PG2 + local POOLNUM2 + local pool2 + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ $p = "1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1=$test_osd1 + chk_osd2=$test_osd2 + elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for i in $(seq 1 $objects) + do + rados -p $pool1 put obj$i $dir/datafile + rados -p $pool2 put obj$i $dir/datafile + done + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# 2 pools can't both backfill to a target which has other data +# 1 of the pools has objects that increase from 1024 to 2611 bytes +# +# Write to fill pool which is size 1 +# Take fill pool osd down (other 2 pools must go to the remaining OSDs +# Save an export of data on fill OSD and restart it +# Write an intial 1K to pool1 which has pg 2.0 +# Export 2.0 from non-fillpool OSD don't wait for it to start-up +# Take down fillpool OSD +# Put 1K object version of 2.0 on fillpool OSD +# Put back fillpool data on fillpool OSD +# With fillpool down write 2611 byte objects +# Take down $osd and bring back $fillosd simultaneously +# Wait for backfilling +# One PG will be able to backfill its remaining data +# One PG must get backfill_toofull +function TEST_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + + wait_for_clean || return 1 + + # Partially fill an osd + # We have room for 600 6K replicated objects, if we create 2611 byte objects + # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one + # replica from the other 2 is 85% of 3600K + + dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 + for o in $(seq 1 $objects) + do + rados -p fillpool put obj-fill-${o} $dir/datafile + done + + local fillosd=$(get_primary fillpool obj-fill-1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + + _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1 + activate_osd $dir $fillosd || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile + done + + ceph pg dump pgs + # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time + _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out + kill_daemon $dir/osd.$fillosd.pid TERM + _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0 + _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1 + _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1 + ceph pg dump pgs + sleep 20 + ceph pg dump pgs + + # re-write everything + dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile + done + done + + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + flush_pg_stats || return 1 + ceph pg dump pgs + + ERRORS=0 + if [ "$(get_num_in_state backfill_toofull)" != "1" ]; + then + echo "One PG should be in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(get_num_in_state active+clean)" != "2" ]; + then + echo "Two PGs should be active+clean after one PG completed backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# Make sure that the amount of bytes already on the replica doesn't +# cause an out of space condition +# +# Create 1 pool and write 4K * 600 objects +# Remove 25% (150) of the objects with one OSD down (noout set) +# Increase the size of the remaining 75% (450) of the objects to 6K +# Bring back down OSD +# The pool should go active+clean +function TEST_backfill_grow() { + local dir=$1 + local poolname="test" + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + sleep 5 + + wait_for_clean || return 1 + + dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4 + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i $dir/4kdata + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + kill_daemons $dir TERM $otherosd || return 1 + + rmobjects=$(expr $objects / 4) + for i in $(seq 1 $rmobjects) + do + rados -p $poolname rm obj$i + done + + dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1 + for i in $(seq $(expr $rmobjects + 1) $objects) + do + rados -p $poolname put obj$i $dir/6kdata + done + + activate_osd $dir $otherosd || return 1 + + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# Create a 5 shard EC pool on 6 OSD cluster +# Fill 1 OSD with 2600K of data take that osd down. +# Write the EC pool on 5 OSDs +# Take down 1 (must contain an EC shard) +# Bring up OSD with fill data +# Not enought room to backfill to partially full OSD +function TEST_ec_backfill_simple() { + local dir=$1 + local EC=$2 + local pools=1 + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 13K objects + # there is only 3600K - (13K * 200) = 1000K which won't hold + # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K + # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which + # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical + # bytes in the pool. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=13 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + sleep 5 + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + sleep 2 + ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 5 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=18 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + ceph pg dump pgs + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +function osdlist() { + local OSDS=$1 + local excludeosd=$2 + + osds="" + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if [ $osd = $excludeosd ]; + then + continue + fi + if [ -n "$osds" ]; then + osds="${osds} " + fi + osds="${osds}${osd}" + done + echo $osds +} + +# Create a pool with size 1 and fill with data so that only 1 EC shard can fit. +# Write data to 2 EC pools mapped to the same OSDs (excluding filled one) +# Remap the last OSD to partially full OSD on both pools +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function TEST_ec_backfill_multi() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # This test requires that shards from 2 different pools + # fit on a given OSD, but both will not fix. I'm using + # making the fillosd plus 1 shard use 75% of the space, + # leaving not enough to be under the 85% set here. + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 9K objects + # there is only 3600K - (9K * 200) = 1800K which will only hold + # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K + # The actual data will be (12K / 3) * 200 = 800K because the extra + # is the reservation padding for chunking. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + nonfillosds="$(osdlist $OSDS $fillosd)" + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 15 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd + done + + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +# Similar to TEST_ec_backfill_multi but one of the ec pools +# already had some data on the target OSD + +# Create a pool with size 1 and fill with data so that only 1 EC shard can fit. +# Write a small amount of data to 1 EC pool that still includes the filled one +# Take down fillosd with noout set +# Write data to 2 EC pools mapped to the same OSDs (excluding filled one) +# Remap the last OSD to partially full OSD on both pools +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function SKIP_TEST_ec_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=5 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + local lastosd=$(expr $OSDS - 1) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # This test requires that shards from 2 different pools + # fit on a given OSD, but both will not fix. I'm using + # making the fillosd plus 1 shard use 75% of the space, + # leaving not enough to be under the 85% set here. + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + # last osd + ceph osd pg-upmap 1.0 $lastosd + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 9K objects + # there is only 3600K - (9K * 200) = 1800K which will only hold + # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K + # The actual data will be (12K / 3) * 200 = 800K because the extra + # is the reservation padding for chunking. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + nonfillosds="$(osdlist $OSDS $fillosd)" + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 15 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 + for o in $(seq 1 $ecobjects) + do + rados -p "${poolprefix}1" put obj$o-1 $dir/datafile + done + + for p in $(seq 1 $pools) + do + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1)) + done + ceph pg dump pgs + + #ceph osd set noout + #kill_daemons $dir TERM osd.$lastosd || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile + done + done + + ceph pg dump pgs + + # Now backfill lastosd by adding back into the upmap + for p in $(seq 1 $pools) + do + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) + done + #activate_osd $dir $lastosd || return 1 + #ceph tell osd.0 debug kick_recovery_wq 0 + + sleep 30 + ceph pg dump pgs + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +function SKIP_TEST_ec_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=6 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Below we need to fit 3200K in 3600K which is 88% + # so set to 90% + ceph osd set-backfillfull-ratio .90 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 48K ec objects, if we create 4k replicated objects + # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard + # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each. + # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + sleep 5 + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + sleep 2 + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 5 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + #ceph pg map 2.0 --format=json | jq '.' + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out + _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +# Create 1 EC pool +# Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K +# Take 1 shard's OSD down (with noout set) +# Remove 50 objects ((12K / 3) + 4k) * 50) = 400K +# Write 150 36K objects (grow 150 objects) 2400K +# But there is already 1600K usage so backfill +# would be too full if it didn't account for existing data +# Bring back down OSD so it must backfill +# It should go active+clean taking into account data already there +function TEST_ec_backfill_grow() { + local dir=$1 + local poolname="test" + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + ceph osd pool create $poolname 1 1 erasure ec-profile + + wait_for_clean || return 1 + + dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12 + for i in $(seq 1 $ecobjects) + do + rados -p $poolname put obj$i $dir/12kdata + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + kill_daemons $dir TERM $otherosd || return 1 + + rmobjects=$(expr $ecobjects / 4) + for i in $(seq 1 $rmobjects) + do + rados -p $poolname rm obj$i + done + + dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36 + for i in $(seq $(expr $rmobjects + 1) $ecobjects) + do + rados -p $poolname put obj$i $dir/36kdata + done + + activate_osd $dir $otherosd || return 1 + + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main osd-backfill-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-stats.sh b/qa/standalone/osd-backfill/osd-backfill-stats.sh new file mode 100755 index 000000000..21b42a4ce --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-stats.sh @@ -0,0 +1,761 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function below_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 )) +} + +function above_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 )) +} + +FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"' +FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"' +FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"' + +function check() { + local dir=$1 + local PG=$2 + local primary=$3 + local type=$4 + local degraded_start=$5 + local degraded_end=$6 + local misplaced_start=$7 + local misplaced_end=$8 + local primary_start=${9:-} + local primary_end=${10:-} + local check_setup=${11:-true} + + local log=$(grep -l +backfilling $dir/osd.$primary.log) + if [ $check_setup = "true" ]; + then + local alllogs=$(grep -l +backfilling $dir/osd.*.log) + if [ "$(echo "$alllogs" | wc -w)" != "1" ]; + then + echo "Test setup failure, a single OSD should have performed backfill" + return 1 + fi + fi + + local addp=" " + if [ "$type" = "erasure" ]; + then + addp="p" + fi + + UPACT=$(eval $FIND_UPACT) + [ -n "$UPACT" ] || return 1 + + # Check 3rd line at start because of false recovery starts + local which="degraded" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $degraded_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $degraded_end || return 1 + + # Check 3rd line at start because of false recovery starts + which="misplaced" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $misplaced_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $misplaced_end || return 1 + + # This is the value of set into MISSING_ON_PRIMARY + if [ -n "$primary_start" ]; + then + which="shard $primary" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $primary_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $primary_end || return 1 + fi +} + +# [1] -> [1, 0, 2] +# degraded 1000 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466 22'500 26:617 [1,0,2] 1 [1] 1 0'0 2017-10-27 09:43:44.654882 0'0 2017-10-27 09:43:44.654882 +function TEST_backfill_sizeup() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + ceph osd set nobackfill + ceph osd pool set $poolname size 3 + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + + local degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $degraded 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + + +# [1] -> [0, 2, 4] +# degraded 1000 -> 0 +# misplaced 500 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 500 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849 22'500 26:603 [0,2,4] 0 [1] 1 0'0 2017-10-27 09:48:13.236253 0'0 2017-10-27 09:48:13.236253 +function TEST_backfill_sizeup_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$primary + ceph osd pool set $poolname size 3 + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + local degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1 0] -> [1,2]/[1,0] +# misplaced 500 -> 0 +# state: active+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-10-27 09:51:18.800517 22'500 25:570 [1,2] 1 [1,0] 1 0'0 2017-10-27 09:50:40.441274 0'0 2017-10-27 09:50:40.441274 +function TEST_backfill_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$(get_not_primary $poolname obj1) + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + check $dir $PG $primary replicated 0 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [0, 1] -> [0, 2]/[0] +# osd 1 down/out +# degraded 500 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091 22'500 27:719 [0,2] 0 [0] 0 0'0 2017-10-27 09:52:43.188368 0'0 2017-10-27 09:52:43.188368 +function TEST_backfill_down_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + check $dir $PG $primary replicated $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1, 0] -> [2, 3, 4] +# degraded 500 -> 0 +# misplaced 1000 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722 23'500 27:553 [2,4,3] 2 [1,0] 1 0'0 2017-10-27 09:55:10.230919 0'0 2017-10-27 09:55:10.230919 +function TEST_backfill_out2() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd pool set $poolname size 3 + ceph osd out osd.${otherosd} + ceph osd out osd.${primary} + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + + check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [0,1] -> [2,4,3]/[0,1] +# degraded 1000 -> 0 +# misplaced 1000 -> 500 +# state ends at active+clean+remapped [2,4,3]/[2,4,3,0] +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149 19'500 23:1817 [2,4,3] 2 [0,1] 0 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904 +# ENDS: +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-30 18:22:42.293730 19'500 25:2557 [2,4,3] 2 [2,4,3,0] 2 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904 +function TEST_backfill_sizeup4_allout() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$otherosd + ceph osd out osd.$primary + ceph osd pool set $poolname size 4 + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misdeg=$(expr $objects \* 2) + check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,2,0] -> [3]/[1,2] +# misplaced 1000 -> 500 +# state ends at active+clean+remapped [3]/[3,1] +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 1000 0 0 100 100 active+remapped+backfilling 2017-11-28 19:13:56.092439 21'500 31:790 [3] 3 [1,2] 1 0'0 2017-11-28 19:13:28.698661 0'0 2017-11-28 19:13:28.698661 +function TEST_backfill_remapped() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.${otherosd} + for i in $(get_osds $poolname obj1) + do + if [ $i = $primary -o $i = $otherosd ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd out osd.${primary} + ceph osd pool set $poolname size 2 + sleep 2 + + # primary may change due to invalidating the old pg_temp, which was [1,2,0], + # but up_primary (3) chooses [0,1] for acting. + primary=$(get_primary $poolname obj1) + + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + + check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0,2] -> [4,3,NONE]/[1,0,2] +# misplaced 1500 -> 500 +# state ends at active+clean+remapped [4,3,NONE]/[4,3,2] + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 1500 0 0 100 100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126 19'500 23:615 [4,3,NONE] 4 [1,0,2] 1 0'0 2017-10-31 16:52:59.624429 0'0 2017-10-31 16:52:59.624429 + + +# ENDS: + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-31 16:48:34.414040 19'500 25:2049 [4,3,NONE] 4 [4,3,2] 4 0'0 2017-10-31 16:46:58.203440 0'0 2017-10-31 16:46:58.203440 +function TEST_backfill_ec_all_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + for o in $(get_osds $poolname obj1) + do + ceph osd out osd.$o + done + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 3) + check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,0,2] -> [4, 0, 2] +# misplaced 500 -> 0 +# active+remapped+backfilling +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-11-08 18:05:39.036420 24'500 27:742 [4,0,2] 4 [1,0,2] 1 0'0 2017-11-08 18:04:58.697315 0'0 2017-11-08 18:04:58.697315 +function TEST_backfill_ec_prim_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$primary + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 3) + check $dir $PG $primary erasure 0 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0] -> [1,2] +# degraded 500 -> 0 +# misplaced 1000 -> 0 +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105 24'500 29:1020 [4,3,5] 4 [1,NONE,2] 1 0'0 2017-11-06 14:01:46.509963 0'0 2017-11-06 14:01:46.509963 +function TEST_backfill_ec_down_all_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + local allosds=$(get_osds $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + for o in $allosds + do + ceph osd out osd.$o + done + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + flush_pg_stats + + # Wait for recovery to finish + # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling + # to active+undersized+remapped + while(true) + do + if test "$(ceph --format json pg dump pgs | + jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0" + then + sleep 2 + continue + fi + break + done + ceph pg dump pgs + for i in $(seq 1 240) + do + if ceph pg dump pgs | grep ^$PG | grep -qv backfilling + then + break + fi + if [ $i = "240" ]; + then + echo "Timeout waiting for recovery to finish" + return 1 + fi + sleep 1 + done + + ceph pg dump pgs + + local misplaced=$(expr $objects \* 2) + check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,0,2] -> [1,3,2] +# degraded 500 -> 0 +# active+backfilling+degraded +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322 22'500 28:794 [1,3,2] 1 [1,NONE,2] 1 0'0 2017-11-06 13:54:58.033906 0'0 2017-11-06 13:54:58.033906 +function TEST_backfill_ec_down_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + check $dir $PG $primary erasure $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main osd-backfill-stats "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh" +# End: |