summaryrefslogtreecommitdiffstats
path: root/qa/standalone/misc
diff options
context:
space:
mode:
Diffstat (limited to 'qa/standalone/misc')
-rwxr-xr-xqa/standalone/misc/mclock-config.sh467
-rwxr-xr-xqa/standalone/misc/network-ping.sh169
-rwxr-xr-xqa/standalone/misc/ok-to-stop.sh296
-rwxr-xr-xqa/standalone/misc/rados-striper.sh101
-rwxr-xr-xqa/standalone/misc/test-ceph-helpers.sh21
-rwxr-xr-xqa/standalone/misc/test-snaptrim-stats.sh188
-rwxr-xr-xqa/standalone/misc/ver-health.sh231
7 files changed, 1473 insertions, 0 deletions
diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh
new file mode 100755
index 000000000..59f002584
--- /dev/null
+++ b/qa/standalone/misc/mclock-config.sh
@@ -0,0 +1,467 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat <contact@redhat.com>
+#
+# Author: Sridhar Seshasayee <sseshasa@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug-mclock 20 "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_profile_builtin_to_custom() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify the default mclock profile on the OSD
+ local mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ mclock_profile=$(eval echo $mclock_profile)
+ test "$mclock_profile" = "high_recovery_ops" || return 1
+
+ # Change the mclock profile to 'custom'
+ ceph tell osd.0 config set osd_mclock_profile custom || return 1
+
+ # Verify that the mclock profile is set to 'custom' on the OSDs
+ mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_profile | jq .osd_mclock_profile)
+ mclock_profile=$(eval echo $mclock_profile)
+ test "$mclock_profile" = "custom" || return 1
+
+ # Change a mclock config param and confirm the change
+ local client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ echo "client_res = $client_res"
+ local client_res_new=$(echo "$client_res + 0.1" | bc -l)
+ echo "client_res_new = $client_res_new"
+ ceph config set osd.0 osd_mclock_scheduler_client_res \
+ $client_res_new || return 1
+
+ # Check value in config monitor db
+ local res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+
+ teardown $dir || return 1
+}
+
+function TEST_profile_custom_to_builtin() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify the default mclock profile on the OSD
+ local def_mclock_profile
+ def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$def_mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ local orig_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ orig_mclock_profile=$(eval echo $orig_mclock_profile)
+ test $orig_mclock_profile = "high_recovery_ops" || return 1
+
+ # Change the mclock profile to 'custom'
+ ceph tell osd.0 config set osd_mclock_profile custom || return 1
+
+ # Verify that the mclock profile is set to 'custom' on the OSDs
+ local mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile | \
+ jq .osd_mclock_profile)
+ mclock_profile=$(eval echo $mclock_profile)
+ test $mclock_profile = "custom" || return 1
+
+ # Save the original client reservations allocated to the OSDs
+ local client_res
+ client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ echo "Original client_res for osd.0 = $client_res"
+
+ # Change a mclock config param and confirm the change
+ local client_res_new=$(echo "$client_res + 0.1" | bc -l)
+ echo "client_res_new = $client_res_new"
+ ceph config set osd osd_mclock_scheduler_client_res \
+ $client_res_new || return 1
+ # Check value in config monitor db
+ local res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+
+ # Switch the mclock profile back to the original built-in profile.
+ # The config subsystem prevents the overwrite of the changed QoS config
+ # option above i.e. osd_mclock_scheduler_client_res. This fact is verified
+ # before proceeding to remove the entry from the config monitor db. After
+ # the config entry is removed, the original value for the config option is
+ # restored and is verified.
+ ceph tell osd.0 config set osd_mclock_profile $orig_mclock_profile || return 1
+ # Verify that the mclock profile is set back to the original on the OSD
+ eval mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile | \
+ jq .osd_mclock_profile)
+ #mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$mclock_profile" = "$orig_mclock_profile" || return 1
+
+ # Verify that the new value is still in effect
+ # Check value in config monitor db
+ local res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+
+ # Remove the changed QoS config option from monitor db
+ ceph config rm osd osd_mclock_scheduler_client_res || return 1
+
+ sleep 5 # Allow time for change to take effect
+
+ # Verify that the original values are now restored
+ # Check value in config monitor db
+ res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != 0.0" | bc -l) )); then
+ return 1
+ fi
+
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res" | bc -l) )); then
+ return 1
+ fi
+
+ teardown $dir || return 1
+}
+
+function TEST_recovery_limit_adjustment_mclock() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+ local recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_recovery_max_active)
+ # Get default value
+ echo "$recoveries" | grep --quiet 'osd_recovery_max_active' || return 1
+
+ # Change the recovery limit without setting
+ # osd_mclock_override_recovery_settings option. Verify that the recovery
+ # limit is retained at its default value.
+ ceph config set osd.0 osd_recovery_max_active 10 || return 1
+ sleep 2 # Allow time for change to take effect
+ local max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_recovery_max_active)
+ test "$max_recoveries" = "$recoveries" || return 1
+
+ # Change recovery limit after setting osd_mclock_override_recovery_settings.
+ # Verify that the recovery limit is modified.
+ ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1
+ ceph config set osd.0 osd_recovery_max_active 10 || return 1
+ sleep 2 # Allow time for change to take effect
+ max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_recovery_max_active)
+ test "$max_recoveries" = '{"osd_recovery_max_active":"10"}' || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_backfill_limit_adjustment_mclock() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ # Get default value
+ echo "osd_max_backfills: $backfills" || return 1
+
+ # Change the backfill limit without setting
+ # osd_mclock_override_recovery_settings option. Verify that the backfill
+ # limit is retained at its default value.
+ ceph config set osd.0 osd_max_backfills 20 || return 1
+ sleep 2 # Allow time for change to take effect
+ local max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ test $max_backfills = $backfills || return 1
+
+ # Verify local and async reserver settings are not changed
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+ test $max_backfills = $backfills || return 1
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+ test $max_backfills = $backfills || return 1
+
+ # Change backfills limit after setting osd_mclock_override_recovery_settings.
+ # Verify that the backfills limit is modified.
+ ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1
+ ceph config set osd.0 osd_max_backfills 20 || return 1
+ sleep 2 # Allow time for change to take effect
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ test $max_backfills = 20 || return 1
+
+ # Verify local and async reserver settings are changed
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+
+ # Kill osd and bring it back up.
+ # Confirm that the backfill settings are retained.
+ kill_daemons $dir TERM osd || return 1
+ ceph osd down 0 || return 1
+ wait_for_osd down 0 || return 1
+ activate_osd $dir 0 --osd-op-queue=mclock_scheduler || return 1
+
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ test $max_backfills = 20 || return 1
+
+ # Verify local and async reserver settings are changed
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_profile_disallow_builtin_params_modify() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify that the default mclock profile is set on the OSD
+ local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$def_mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ cur_mclock_profile=$(eval echo $cur_mclock_profile)
+ test $cur_mclock_profile = "high_recovery_ops" || return 1
+
+ declare -a options=("osd_mclock_scheduler_background_recovery_res"
+ "osd_mclock_scheduler_client_res")
+
+ local retries=10
+ local errors=0
+ for opt in "${options[@]}"
+ do
+ # Try and change a mclock config param and confirm that no change occurred
+ local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get $opt | jq .$opt | bc)
+ local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l)
+ ceph config set osd.0 $opt $opt_val_new || return 1
+
+ # Check configuration values
+ for count in $(seq 0 $(expr $retries - 1))
+ do
+ errors=0
+ sleep 2 # Allow time for changes to take effect
+
+ echo "Check configuration values - Attempt#: $count"
+ # Check configuration value on Mon store (or the default) for the osd
+ local res=$(ceph config get osd.0 $opt) || return 1
+ echo "Mon db (or default): osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check running configuration value using "config show" cmd
+ res=$(ceph config show osd.0 | grep $opt |\
+ awk '{ print $2 }' | bc ) || return 1
+ echo "Running config: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check value in the in-memory 'values' map is unmodified
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get $opt | jq .$opt | bc)
+ echo "Values map: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check if we succeeded or exhausted retry count
+ if [ $errors -eq 0 ]
+ then
+ break
+ elif [ $count -eq $(expr $retries - 1) ]
+ then
+ return 1
+ fi
+ done
+ done
+
+ teardown $dir || return 1
+}
+
+function TEST_profile_disallow_builtin_params_override() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify that the default mclock profile is set on the OSD
+ local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$def_mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ cur_mclock_profile=$(eval echo $cur_mclock_profile)
+ test $cur_mclock_profile = "high_recovery_ops" || return 1
+
+ declare -a options=("osd_mclock_scheduler_background_recovery_res"
+ "osd_mclock_scheduler_client_res")
+
+ local retries=10
+ local errors=0
+ for opt in "${options[@]}"
+ do
+ # Override a mclock config param and confirm that no change occurred
+ local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get $opt | jq .$opt | bc)
+ local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l)
+ ceph tell osd.0 config set $opt $opt_val_new || return 1
+
+ # Check configuration values
+ for count in $(seq 0 $(expr $retries - 1))
+ do
+ errors=0
+ sleep 2 # Allow time for changes to take effect
+
+ echo "Check configuration values - Attempt#: $count"
+ # Check configuration value on Mon store (or the default) for the osd
+ local res=$(ceph config get osd.0 $opt) || return 1
+ echo "Mon db (or default): osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check running configuration value using "config show" cmd
+ res=$(ceph config show osd.0 | grep $opt |\
+ awk '{ print $2 }' | bc ) || return 1
+ echo "Running config: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check value in the in-memory 'values' map is unmodified
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get $opt | jq .$opt | bc)
+ echo "Values map: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check if we succeeded or exhausted retry count
+ if [ $errors -eq 0 ]
+ then
+ break
+ elif [ $count -eq $(expr $retries - 1) ]
+ then
+ return 1
+ fi
+ done
+ done
+
+ teardown $dir || return 1
+}
+
+main mclock-config "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh mclock-config.sh"
+# End:
diff --git a/qa/standalone/misc/network-ping.sh b/qa/standalone/misc/network-ping.sh
new file mode 100755
index 000000000..4745108c5
--- /dev/null
+++ b/qa/standalone/misc/network-ping.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug_disable_randomized_ping=true "
+ CEPH_ARGS+="--debug_heartbeat_testing_span=5 "
+ CEPH_ARGS+="--osd_heartbeat_interval=1 "
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_network_ping_test1() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+
+ create_pool foo 16
+
+ # write some objects
+ timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
+
+ # Get 1 cycle worth of ping data "1 minute"
+ sleep 10
+ flush_pg_stats
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ # Wait another 4 cycles to get "5 minute interval"
+ sleep 20
+ flush_pg_stats
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+
+ # Wait another 10 cycles to get "15 minute interval"
+ sleep 50
+ flush_pg_stats
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ # Just check the threshold output matches the input
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json
+ test "$(cat $dir/json | jq '.threshold')" = "99" || return 1
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json
+ test "$(cat $dir/json | jq '.threshold')" = "98" || return 1
+
+ rm -f $dir/json
+}
+
+# Test setting of mon_warn_on_slow_ping_time very low to
+# get health warning
+function TEST_network_ping_test2() {
+ local dir=$1
+
+ export CEPH_ARGS
+ export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+ ceph osd crush add-bucket dc1 datacenter
+ ceph osd crush add-bucket dc2 datacenter
+ ceph osd crush add-bucket dc3 datacenter
+ ceph osd crush add-bucket rack1 rack
+ ceph osd crush add-bucket rack2 rack
+ ceph osd crush add-bucket rack3 rack
+ ceph osd crush add-bucket host1 host
+ ceph osd crush add-bucket host2 host
+ ceph osd crush add-bucket host3 host
+ ceph osd crush move dc1 root=default
+ ceph osd crush move dc2 root=default
+ ceph osd crush move dc3 root=default
+ ceph osd crush move rack1 datacenter=dc1
+ ceph osd crush move rack2 datacenter=dc2
+ ceph osd crush move rack3 datacenter=dc3
+ ceph osd crush move host1 rack=rack1
+ ceph osd crush move host2 rack=rack2
+ ceph osd crush move host3 rack=rack3
+ ceph osd crush set osd.0 1.0 host=host1
+ ceph osd crush set osd.1 1.0 host=host2
+ ceph osd crush set osd.2 1.0 host=host3
+ ceph osd crush rule create-simple myrule default host firstn
+
+ create_pool foo 16 16 replicated myrule
+
+ # write some objects
+ timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
+
+ # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings)
+ sleep 10
+ flush_pg_stats
+
+ ceph health | tee $dir/health
+ grep -q "Slow OSD heartbeats" $dir/health || return 1
+
+ ceph health detail | tee $dir/health
+ grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
+ grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
+ grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
+to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
+ rm -f $dir/health
+}
+
+main network-ping "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh"
+# End:
diff --git a/qa/standalone/misc/ok-to-stop.sh b/qa/standalone/misc/ok-to-stop.sh
new file mode 100755
index 000000000..dc9e7422f
--- /dev/null
+++ b/qa/standalone/misc/ok-to-stop.sh
@@ -0,0 +1,296 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7150" # git grep '\<7150\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7151" # git grep '\<7151\>' : there must be only one
+ export CEPH_MON_C="127.0.0.1:7152" # git grep '\<7152\>' : there must be only one
+ export CEPH_MON_D="127.0.0.1:7153" # git grep '\<7153\>' : there must be only one
+ export CEPH_MON_E="127.0.0.1:7154" # git grep '\<7154\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ export ORIG_CEPH_ARGS="$CEPH_ARGS"
+
+ local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ kill_daemons $dir KILL || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_1_mon_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+
+ ceph mon ok-to-stop dne || return 1
+ ! ceph mon ok-to-stop a || return 1
+
+ ! ceph mon ok-to-add-offline || return 1
+
+ ! ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm dne || return 1
+}
+
+function TEST_2_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+
+ ceph mon ok-to-stop dne || return 1
+ ! ceph mon ok-to-stop a || return 1
+ ! ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop a b || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm dne || return 1
+}
+
+function TEST_3_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+ wait_for_quorum 60 3
+
+ ceph mon ok-to-stop dne || return 1
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ! ceph mon ok-to-stop a b || return 1
+ ! ceph mon ok-to-stop b c || return 1
+ ! ceph mon ok-to-stop a b c || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+
+ kill_daemons $dir KILL mon.b
+ wait_for_quorum 60 2
+
+ ! ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop c || return 1
+
+ ! ceph mon ok-to-add-offline || return 1
+
+ ! ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ! ceph mon ok-to-rm c || return 1
+}
+
+function TEST_4_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+ run_mon $dir d --public-addr=$CEPH_MON_D || return 1
+ wait_for_quorum 60 4
+
+ ceph mon ok-to-stop dne || return 1
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ceph mon ok-to-stop d || return 1
+ ! ceph mon ok-to-stop a b || return 1
+ ! ceph mon ok-to-stop c d || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+
+ kill_daemons $dir KILL mon.a
+ wait_for_quorum 60 3
+
+ ceph mon ok-to-stop a || return 1
+ ! ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop c || return 1
+ ! ceph mon ok-to-stop d || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+ ceph mon ok-to-rm d || return 1
+}
+
+function TEST_5_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+ run_mon $dir d --public-addr=$CEPH_MON_D || return 1
+ run_mon $dir e --public-addr=$CEPH_MON_E || return 1
+ wait_for_quorum 60 5
+
+ ceph mon ok-to-stop dne || return 1
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ceph mon ok-to-stop d || return 1
+ ceph mon ok-to-stop e || return 1
+ ceph mon ok-to-stop a b || return 1
+ ceph mon ok-to-stop c d || return 1
+ ! ceph mon ok-to-stop a b c || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+ ceph mon ok-to-rm d || return 1
+ ceph mon ok-to-rm e || return 1
+
+ kill_daemons $dir KILL mon.a
+ wait_for_quorum 60 4
+
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ceph mon ok-to-stop d || return 1
+ ceph mon ok-to-stop e || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+ ceph mon ok-to-rm d || return 1
+ ceph mon ok-to-rm e || return 1
+
+ kill_daemons $dir KILL mon.e
+ wait_for_quorum 60 3
+
+ ceph mon ok-to-stop a || return 1
+ ! ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop c || return 1
+ ! ceph mon ok-to-stop d || return 1
+ ceph mon ok-to-stop e || return 1
+
+ ! ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ! ceph mon ok-to-rm b || return 1
+ ! ceph mon ok-to-rm c || return 1
+ ! ceph mon ok-to-rm d || return 1
+ ceph mon ok-to-rm e || return 1
+}
+
+function TEST_0_mds() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_mds $dir a || return 1
+
+ ceph osd pool create meta 1 || return 1
+ ceph osd pool create data 1 || return 1
+ ceph fs new myfs meta data || return 1
+ sleep 5
+
+ ! ceph mds ok-to-stop a || return 1
+ ! ceph mds ok-to-stop a dne || return 1
+ ceph mds ok-to-stop dne || return 1
+
+ run_mds $dir b || return 1
+ sleep 5
+
+ ceph mds ok-to-stop a || return 1
+ ceph mds ok-to-stop b || return 1
+ ! ceph mds ok-to-stop a b || return 1
+ ceph mds ok-to-stop a dne1 dne2 || return 1
+ ceph mds ok-to-stop b dne || return 1
+ ! ceph mds ok-to-stop a b dne || return 1
+ ceph mds ok-to-stop dne1 dne2 || return 1
+
+ kill_daemons $dir KILL mds.a
+}
+
+function TEST_0_osd() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd || return 1
+ ceph osd pool create ec erasure ec-profile || return 1
+
+ wait_for_clean || return 1
+
+ # with min_size 3, we can stop only 1 osd
+ ceph osd pool set ec min_size 3 || return 1
+ wait_for_clean || return 1
+
+ ceph osd ok-to-stop 0 || return 1
+ ceph osd ok-to-stop 1 || return 1
+ ceph osd ok-to-stop 2 || return 1
+ ceph osd ok-to-stop 3 || return 1
+ ! ceph osd ok-to-stop 0 1 || return 1
+ ! ceph osd ok-to-stop 2 3 || return 1
+ ceph osd ok-to-stop 0 --max 2 | grep '[0]' || return 1
+ ceph osd ok-to-stop 1 --max 2 | grep '[1]' || return 1
+
+ # with min_size 2 we can stop 1 osds
+ ceph osd pool set ec min_size 2 || return 1
+ wait_for_clean || return 1
+
+ ceph osd ok-to-stop 0 1 || return 1
+ ceph osd ok-to-stop 2 3 || return 1
+ ! ceph osd ok-to-stop 0 1 2 || return 1
+ ! ceph osd ok-to-stop 1 2 3 || return 1
+
+ ceph osd ok-to-stop 0 --max 2 | grep '[0,1]' || return 1
+ ceph osd ok-to-stop 0 --max 20 | grep '[0,1]' || return 1
+ ceph osd ok-to-stop 2 --max 2 | grep '[2,3]' || return 1
+ ceph osd ok-to-stop 2 --max 20 | grep '[2,3]' || return 1
+
+ # we should get the same result with one of the osds already down
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph osd down 0 || return 1
+ wait_for_peered || return 1
+
+ ceph osd ok-to-stop 0 || return 1
+ ceph osd ok-to-stop 0 1 || return 1
+ ! ceph osd ok-to-stop 0 1 2 || return 1
+ ! ceph osd ok-to-stop 1 2 3 || return 1
+}
+
+
+main ok-to-stop "$@"
diff --git a/qa/standalone/misc/rados-striper.sh b/qa/standalone/misc/rados-striper.sh
new file mode 100755
index 000000000..be6349b81
--- /dev/null
+++ b/qa/standalone/misc/rados-striper.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Sebastien Ponce <sebastien.ponce@cern.ch>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7116" # git grep '\<7116\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with one monitor and three osds
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ create_rbd_pool || return 1
+
+ # create toyfile
+ dd if=/dev/urandom of=$dir/toyfile bs=1234 count=1
+
+ # put a striped object
+ rados --pool rbd --striper put toyfile $dir/toyfile || return 1
+
+ # stat it, with and without striping
+ rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1
+ rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1
+ echo ' size 1234' > $dir/refstat
+ diff -w $dir/stripedStat $dir/refstat || return 1
+ diff -w $dir/stat $dir/refstat || return 1
+ rados --pool rbd stat toyfile >& $dir/staterror
+ grep -q 'No such file or directory' $dir/staterror || return 1
+
+ # get the file back with and without striping
+ rados --pool rbd --striper get toyfile $dir/stripedGroup || return 1
+ diff -w $dir/toyfile $dir/stripedGroup || return 1
+ rados --pool rbd get toyfile.0000000000000000 $dir/nonSTripedGroup || return 1
+ diff -w $dir/toyfile $dir/nonSTripedGroup || return 1
+
+ # test truncate
+ rados --pool rbd --striper truncate toyfile 12
+ rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1
+ rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1
+ echo ' size 12' > $dir/reftrunc
+ diff -w $dir/stripedStat $dir/reftrunc || return 1
+ diff -w $dir/stat $dir/reftrunc || return 1
+
+ # test xattrs
+
+ rados --pool rbd --striper setxattr toyfile somexattr somevalue || return 1
+ rados --pool rbd --striper getxattr toyfile somexattr > $dir/xattrvalue || return 1
+ rados --pool rbd getxattr toyfile.0000000000000000 somexattr > $dir/xattrvalue2 || return 1
+ echo 'somevalue' > $dir/refvalue
+ diff -w $dir/xattrvalue $dir/refvalue || return 1
+ diff -w $dir/xattrvalue2 $dir/refvalue || return 1
+ rados --pool rbd --striper listxattr toyfile > $dir/xattrlist || return 1
+ echo 'somexattr' > $dir/reflist
+ diff -w $dir/xattrlist $dir/reflist || return 1
+ rados --pool rbd listxattr toyfile.0000000000000000 | grep -v striper > $dir/xattrlist2 || return 1
+ diff -w $dir/xattrlist2 $dir/reflist || return 1
+ rados --pool rbd --striper rmxattr toyfile somexattr || return 1
+
+ local attr_not_found_str="No data available"
+ [ `uname` = FreeBSD ] && \
+ attr_not_found_str="Attribute not found"
+ expect_failure $dir "$attr_not_found_str" \
+ rados --pool rbd --striper getxattr toyfile somexattr || return 1
+ expect_failure $dir "$attr_not_found_str" \
+ rados --pool rbd getxattr toyfile.0000000000000000 somexattr || return 1
+
+ # test rm
+ rados --pool rbd --striper rm toyfile || return 1
+ expect_failure $dir 'No such file or directory' \
+ rados --pool rbd --striper stat toyfile || return 1
+ expect_failure $dir 'No such file or directory' \
+ rados --pool rbd stat toyfile.0000000000000000 || return 1
+
+ # cleanup
+ teardown $dir || return 1
+}
+
+main rados-striper "$@"
diff --git a/qa/standalone/misc/test-ceph-helpers.sh b/qa/standalone/misc/test-ceph-helpers.sh
new file mode 100755
index 000000000..e7805858a
--- /dev/null
+++ b/qa/standalone/misc/test-ceph-helpers.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Federico Gimenez <fgimenez@coit.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@"
diff --git a/qa/standalone/misc/test-snaptrim-stats.sh b/qa/standalone/misc/test-snaptrim-stats.sh
new file mode 100755
index 000000000..98b3e4fdd
--- /dev/null
+++ b/qa/standalone/misc/test-snaptrim-stats.sh
@@ -0,0 +1,188 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat <contact@redhat.com>
+#
+# Author: Sridhar Seshasayee <sseshasa@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug-bluestore 20 "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_snaptrim_stats() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local PGNUM=8
+ local PGPNUM=8
+ local objects=10
+ local WAIT_FOR_UPDATE=10
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1
+ done
+
+ # disable scrubs
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+
+ # Create a pool
+ create_pool $poolname $PGNUM $PGPNUM
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ # write a few objects
+ TESTDATA="testdata.1"
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # create a snapshot, clones
+ SNAP=1
+ rados -p $poolname mksnap snap${SNAP}
+ TESTDATA="testdata.2"
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # remove the snapshot, should trigger snaptrim
+ rados -p $poolname rmsnap snap${SNAP}
+
+ # check for snaptrim stats
+ wait_for_clean || return 1
+ sleep $WAIT_FOR_UPDATE
+ local objects_trimmed=0
+ local snaptrim_duration_total=0.0
+ for i in $(seq 0 $(expr $PGNUM - 1))
+ do
+ local pgid="${poolid}.${i}"
+ objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \
+ jq '.info.stats.objects_trimmed'))
+ snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \
+ $pgid query | jq '.info.stats.snaptrim_duration') | bc`
+ done
+ test $objects_trimmed -eq $objects || return 1
+ echo "$snaptrim_duration_total > 0.0" | bc || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_snaptrim_stats_multiple_snaps() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local PGNUM=8
+ local PGPNUM=8
+ local objects=10
+ local WAIT_FOR_UPDATE=10
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1
+ done
+
+ # disable scrubs
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+
+ # Create a pool
+ create_pool $poolname $PGNUM $PGPNUM
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ # write a few objects
+ local TESTDATA="testdata.0"
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # create snapshots, clones
+ NUMSNAPS=2
+ for i in `seq 1 $NUMSNAPS`
+ do
+ rados -p $poolname mksnap snap${i}
+ TESTDATA="testdata".${i}
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+ done
+
+ # remove the snapshots, should trigger snaptrim
+ local total_objects_trimmed=0
+ for i in `seq 1 $NUMSNAPS`
+ do
+ rados -p $poolname rmsnap snap${i}
+
+ # check for snaptrim stats
+ wait_for_clean || return 1
+ sleep $WAIT_FOR_UPDATE
+ local objects_trimmed=0
+ local snaptrim_duration_total=0.0
+ for i in $(seq 0 $(expr $PGNUM - 1))
+ do
+ local pgid="${poolid}.${i}"
+ objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \
+ jq '.info.stats.objects_trimmed'))
+ snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \
+ $pgid query | jq '.info.stats.snaptrim_duration') | bc`
+ done
+ test $objects_trimmed -eq $objects || return 1
+ echo "$snaptrim_duration_total > 0.0" | bc || return 1
+ total_objects_trimmed=$(expr $total_objects_trimmed + $objects_trimmed)
+ done
+
+ test $total_objects_trimmed -eq $((objects * NUMSNAPS)) || return 1
+
+ teardown $dir || return 1
+}
+main test-snaptrim-stats "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh test-snaptrim-stats.sh"
+# End:
diff --git a/qa/standalone/misc/ver-health.sh b/qa/standalone/misc/ver-health.sh
new file mode 100755
index 000000000..e03f8f4f5
--- /dev/null
+++ b/qa/standalone/misc/ver-health.sh
@@ -0,0 +1,231 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7165" # git grep '\<7165\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7166" # git grep '\<7166\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--mon_health_to_clog_tick_interval=1.0 "
+ export ORIG_CEPH_ARGS="$CEPH_ARGS"
+
+ local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function wait_for_health_string() {
+ local grep_string=$1
+ local seconds=${2:-20}
+
+ # Allow mon to notice version difference
+ set -o pipefail
+ PASSED="false"
+ for ((i=0; i < $seconds; i++)); do
+ if ceph health | grep -q "$grep_string"
+ then
+ PASSED="true"
+ break
+ fi
+ sleep 1
+ done
+ set +o pipefail
+
+ # Make sure health changed
+ if [ $PASSED = "false" ];
+ then
+ return 1
+ fi
+ return 0
+}
+
+
+
+# Test a single OSD with an old version and multiple OSDs with 2 different old versions
+function TEST_check_version_health_1() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with two monitors and three osds
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ kill_daemons $dir KILL osd.1
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 1
+
+ wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+
+ ceph health detail
+ # Should notice that osd.1 is a different version
+ ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+ wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+ ceph health detail
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+# Test with 1 MON and 1 MDS with an older version, and add 2 OSDs with different versions
+function TEST_check_version_health_2() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with all daemon types
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_mgr $dir x || return 1
+ run_mgr $dir y || return 1
+ run_mds $dir m || return 1
+ run_mds $dir n || return 1
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ kill_daemons $dir KILL mon.b
+ ceph_debug_version_for_testing=01.00.00-gversion-test run_mon $dir b --mon_warn_older_version_delay=0
+ # XXX: Manager doesn't seem to use the test specific config for version
+ #kill_daemons $dir KILL mgr.x
+ #ceph_debug_version_for_testing=02.00.00-gversion-test run_mgr $dir x
+ kill_daemons $dir KILL mds.m
+ ceph_debug_version_for_testing=01.00.00-gversion-test run_mds $dir m
+
+ wait_for_health_string "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
+
+ ceph health detail
+ # Should notice that mon.b and mds.m is a different version
+ ceph health detail | grep -q "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There are daemons running an older version of ceph" || return 1
+ ceph health detail | grep -q "mon.b mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+ wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+ ceph health detail
+ ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "mon.b osd.2 mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+# Verify delay handling with same setup as test 1
+function TEST_check_version_health_3() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with two monitors and three osds
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+
+ local start_osd_time=$SECONDS
+ # use memstore for faster bootup
+ EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 0 || return 1
+ EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 1 || return 1
+ EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 2 || return 1
+ # take the time used for boot osds into consideration
+ local warn_older_version_delay=$(($SECONDS - $start_osd_time + 20))
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+ ceph tell 'mon.*' injectargs "--mon_warn_older_version_delay $warn_older_version_delay"
+ kill_daemons $dir KILL osd.1
+ EXTRA_OPTS=" --osd-objectstore=memstore" \
+ ceph_debug_version_for_testing=01.00.00-gversion-test \
+ activate_osd $dir 1
+
+ # Wait 50% of 20 second delay config
+ sleep 10
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ # Now make sure that at least 20 seconds have passed
+ wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" 20 || return 1
+
+ ceph health detail
+ # Should notice that osd.1 is a different version
+ ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+ wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+ ceph health detail
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+main ver-health "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh ver-health.sh"
+# End: