summaryrefslogtreecommitdiffstats
path: root/qa/standalone
diff options
context:
space:
mode:
Diffstat (limited to 'qa/standalone')
-rw-r--r--qa/standalone/README23
-rwxr-xr-xqa/standalone/c2c/c2c.sh84
-rwxr-xr-xqa/standalone/ceph-helpers.sh2409
-rwxr-xr-xqa/standalone/crush/crush-choose-args.sh243
-rwxr-xr-xqa/standalone/crush/crush-classes.sh265
-rwxr-xr-xqa/standalone/erasure-code/test-erasure-code-plugins.sh118
-rwxr-xr-xqa/standalone/erasure-code/test-erasure-code.sh337
-rwxr-xr-xqa/standalone/erasure-code/test-erasure-eio.sh700
-rwxr-xr-xqa/standalone/mgr/balancer.sh223
-rwxr-xr-xqa/standalone/misc/mclock-config.sh467
-rwxr-xr-xqa/standalone/misc/network-ping.sh169
-rwxr-xr-xqa/standalone/misc/ok-to-stop.sh296
-rwxr-xr-xqa/standalone/misc/rados-striper.sh101
-rwxr-xr-xqa/standalone/misc/test-ceph-helpers.sh21
-rwxr-xr-xqa/standalone/misc/test-snaptrim-stats.sh188
-rwxr-xr-xqa/standalone/misc/ver-health.sh231
-rwxr-xr-xqa/standalone/mon-stretch/mon-stretch-fail-recovery.sh148
-rwxr-xr-xqa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh145
-rwxr-xr-xqa/standalone/mon/health-mute.sh124
-rwxr-xr-xqa/standalone/mon/misc.sh284
-rwxr-xr-xqa/standalone/mon/mkfs.sh193
-rwxr-xr-xqa/standalone/mon/mon-bind.sh143
-rwxr-xr-xqa/standalone/mon/mon-created-time.sh54
-rwxr-xr-xqa/standalone/mon/mon-handle-forward.sh64
-rwxr-xr-xqa/standalone/mon/mon-last-epoch-clean.sh307
-rwxr-xr-xqa/standalone/mon/mon-osdmap-prune.sh57
-rwxr-xr-xqa/standalone/mon/mon-ping.sh46
-rwxr-xr-xqa/standalone/mon/mon-scrub.sh49
-rwxr-xr-xqa/standalone/mon/mon-seesaw.sh72
-rwxr-xr-xqa/standalone/mon/osd-crush.sh196
-rwxr-xr-xqa/standalone/mon/osd-df.sh97
-rwxr-xr-xqa/standalone/mon/osd-erasure-code-profile.sh240
-rwxr-xr-xqa/standalone/mon/osd-pool-create.sh307
-rwxr-xr-xqa/standalone/mon/osd-pool-df.sh76
-rwxr-xr-xqa/standalone/mon/test_pool_quota.sh63
-rwxr-xr-xqa/standalone/osd-backfill/osd-backfill-prio.sh522
-rwxr-xr-xqa/standalone/osd-backfill/osd-backfill-recovery-log.sh139
-rwxr-xr-xqa/standalone/osd-backfill/osd-backfill-space.sh1176
-rwxr-xr-xqa/standalone/osd-backfill/osd-backfill-stats.sh761
-rwxr-xr-xqa/standalone/osd/bad-inc-map.sh62
-rwxr-xr-xqa/standalone/osd/divergent-priors.sh855
-rwxr-xr-xqa/standalone/osd/ec-error-rollforward.sh66
-rwxr-xr-xqa/standalone/osd/osd-bench.sh97
-rwxr-xr-xqa/standalone/osd/osd-bluefs-volume-ops.sh497
-rwxr-xr-xqa/standalone/osd/osd-config.sh97
-rwxr-xr-xqa/standalone/osd/osd-copy-from.sh68
-rwxr-xr-xqa/standalone/osd/osd-dup.sh30
-rwxr-xr-xqa/standalone/osd/osd-fast-mark-down.sh111
-rwxr-xr-xqa/standalone/osd/osd-force-create-pg.sh53
-rwxr-xr-xqa/standalone/osd/osd-markdown.sh149
-rwxr-xr-xqa/standalone/osd/osd-reactivate.sh56
-rwxr-xr-xqa/standalone/osd/osd-recovery-prio.sh542
-rwxr-xr-xqa/standalone/osd/osd-recovery-space.sh176
-rwxr-xr-xqa/standalone/osd/osd-recovery-stats.sh512
-rwxr-xr-xqa/standalone/osd/osd-rep-recov-eio.sh422
-rwxr-xr-xqa/standalone/osd/osd-reuse-id.sh53
-rwxr-xr-xqa/standalone/osd/pg-split-merge.sh203
-rwxr-xr-xqa/standalone/osd/repeer-on-acting-back.sh129
-rwxr-xr-xqa/standalone/osd/repro_long_log.sh197
-rwxr-xr-xqa/standalone/scrub/osd-mapper.sh182
-rwxr-xr-xqa/standalone/scrub/osd-recovery-scrub.sh352
-rwxr-xr-xqa/standalone/scrub/osd-scrub-dump.sh180
-rwxr-xr-xqa/standalone/scrub/osd-scrub-repair.sh6255
-rwxr-xr-xqa/standalone/scrub/osd-scrub-snaps.sh1188
-rwxr-xr-xqa/standalone/scrub/osd-scrub-test.sh664
-rwxr-xr-xqa/standalone/scrub/osd-unexpected-clone.sh89
-rw-r--r--qa/standalone/scrub/scrub-helpers.sh302
-rwxr-xr-xqa/standalone/special/ceph_objectstore_tool.py2045
-rwxr-xr-xqa/standalone/special/test-failure.sh48
69 files changed, 26818 insertions, 0 deletions
diff --git a/qa/standalone/README b/qa/standalone/README
new file mode 100644
index 000000000..3082442cb
--- /dev/null
+++ b/qa/standalone/README
@@ -0,0 +1,23 @@
+qa/standalone
+=============
+
+These scripts run standalone clusters, but not in a normal way. They make
+use of functions ceph-helpers.sh to quickly start/stop daemons against
+toy clusters in a single directory.
+
+They are normally run via teuthology based on qa/suites/rados/standalone/*.yaml.
+
+You can run them in a git checkout + build directory as well:
+
+ * The qa/run-standalone.sh will run all of them in sequence. This is slow
+ since there is no parallelism.
+
+ * You can run individual script(s) by specifying the basename or path below
+ qa/standalone as arguments to qa/run-standalone.sh.
+
+../qa/run-standalone.sh misc.sh osd/osd-dup.sh
+
+ * Add support for specifying arguments to selected tests by simply adding
+ list of tests to each argument.
+
+../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp"
diff --git a/qa/standalone/c2c/c2c.sh b/qa/standalone/c2c/c2c.sh
new file mode 100755
index 000000000..a6969d555
--- /dev/null
+++ b/qa/standalone/c2c/c2c.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function run_perf_c2c() {
+ # First get some background system info
+ uname -a > uname.out
+ lscpu > lscpu.out
+ cat /proc/cmdline > cmdline.out
+ timeout -s INT 10 vmstat -w 1 > vmstat.out || true
+ sudo dmesg >& dmesg.out
+ cat /proc/cpuinfo > cpuinfo.out
+ ps axo psr,time,stat,ppid,pid,pcpu,comm > ps.1.out
+ ps -eafT > ps.2.out
+ sudo sysctl -a > sysctl.out
+
+ nodecnt=`lscpu|grep "NUMA node(" |awk '{print $3}'`
+ for ((i=0; i<$nodecnt; i++))
+ do
+ sudo cat /sys/devices/system/node/node${i}/meminfo > meminfo.$i.out
+ done
+ sudo more `sudo find /proc -name status` > proc_parent_child_status.out
+ sudo more /proc/*/numa_maps > numa_maps.out
+
+ #
+ # Get separate kernel and user perf-c2c stats
+ #
+ sudo perf c2c record -a --ldlat=70 --all-user -o perf_c2c_a_all_user.data sleep 5
+ sudo perf c2c report --stdio -i perf_c2c_a_all_user.data > perf_c2c_a_all_user.out 2>&1
+ sudo perf c2c report --full-symbols --stdio -i perf_c2c_a_all_user.data > perf_c2c_full-sym_a_all_user.out 2>&1
+
+ sudo perf c2c record --call-graph dwarf -a --ldlat=70 --all-user -o perf_c2c_g_a_all_user.data sleep 5
+ sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_user.data > perf_c2c_g_a_all_user.out 2>&1
+
+ sudo perf c2c record -a --ldlat=70 --all-kernel -o perf_c2c_a_all_kernel.data sleep 4
+ sudo perf c2c report --stdio -i perf_c2c_a_all_kernel.data > perf_c2c_a_all_kernel.out 2>&1
+
+ sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_all_kernel.data sleep 4
+
+ sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_kernel.data > perf_c2c_g_a_all_kernel.out 2>&1
+
+ #
+ # Get combined kernel and user perf-c2c stats
+ #
+ sudo perf c2c record -a --ldlat=70 -o perf_c2c_a_both.data sleep 4
+ sudo perf c2c report --stdio -i perf_c2c_a_both.data > perf_c2c_a_both.out 2>&1
+
+ sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_both.data sleep 4
+ sudo perf c2c report -g --stdio -i perf_c2c_g_a_both.data > perf_c2c_g_a_both.out 2>&1
+
+ #
+ # Get all-user physical addr stats, in case multiple threads or processes are
+ # accessing shared memory with different vaddrs.
+ #
+ sudo perf c2c record --phys-data -a --ldlat=70 --all-user -o perf_c2c_a_all_user_phys_data.data sleep 5
+ sudo perf c2c report --stdio -i perf_c2c_a_all_user_phys_data.data > perf_c2c_a_all_user_phys_data.out 2>&1
+}
+
+function run() {
+ local dir=$1
+ shift
+ (
+ rm -fr $dir
+ mkdir $dir
+ cd $dir
+ ceph_test_c2c --threads $(($(nproc) * 2)) "$@" &
+ sleep 30 # let it warm up
+ run_perf_c2c
+ kill $! || { echo "ceph_test_c2c WAS NOT RUNNING" ; exit 1 ; }
+ ) || exit 1
+}
+
+function bench() {
+ optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) --sharding 2> /dev/null || true)
+ not_optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) 2> /dev/null || true)
+ if ! (( $optimized > ( $not_optimized * 2 ) )) ; then
+ echo "the optimization is expected to be at least x2 faster"
+ exit 1
+ fi
+}
+
+run with-sharding --sharding
+run without-sharding
+bench
diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh
new file mode 100755
index 000000000..bf2c91bc0
--- /dev/null
+++ b/qa/standalone/ceph-helpers.sh
@@ -0,0 +1,2409 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014,2015 Red Hat <contact@redhat.com>
+# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Federico Gimenez <fgimenez@coit.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+TIMEOUT=300
+WAIT_FOR_CLEAN_TIMEOUT=90
+MAX_TIMEOUT=15
+PG_NUM=4
+TMPDIR=${TMPDIR:-/tmp}
+CEPH_BUILD_VIRTUALENV=${TMPDIR}
+TESTDIR=${TESTDIR:-${TMPDIR}}
+
+if type xmlstarlet > /dev/null 2>&1; then
+ XMLSTARLET=xmlstarlet
+elif type xml > /dev/null 2>&1; then
+ XMLSTARLET=xml
+else
+ echo "Missing xmlstarlet binary!"
+ exit 1
+fi
+
+if [ `uname` = FreeBSD ]; then
+ SED=gsed
+ AWK=gawk
+ DIFFCOLOPTS=""
+ KERNCORE="kern.corefile"
+else
+ SED=sed
+ AWK=awk
+ termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/')
+ if [ -n "$termwidth" -a "$termwidth" != "0" ]; then
+ termwidth="-W ${termwidth}"
+ fi
+ DIFFCOLOPTS="-y $termwidth"
+ KERNCORE="kernel.core_pattern"
+fi
+
+EXTRA_OPTS=""
+
+#! @file ceph-helpers.sh
+# @brief Toolbox to manage Ceph cluster dedicated to testing
+#
+# Example use case:
+#
+# ~~~~~~~~~~~~~~~~{.sh}
+# source ceph-helpers.sh
+#
+# function mytest() {
+# # cleanup leftovers and reset mydir
+# setup mydir
+# # create a cluster with one monitor and three osds
+# run_mon mydir a
+# run_osd mydir 0
+# run_osd mydir 2
+# run_osd mydir 3
+# # put and get an object
+# rados --pool rbd put GROUP /etc/group
+# rados --pool rbd get GROUP /tmp/GROUP
+# # stop the cluster and cleanup the directory
+# teardown mydir
+# }
+# ~~~~~~~~~~~~~~~~
+#
+# The focus is on simplicity and efficiency, in the context of
+# functional tests. The output is intentionally very verbose
+# and functions return as soon as an error is found. The caller
+# is also expected to abort on the first error so that debugging
+# can be done by looking at the end of the output.
+#
+# Each function is documented, implemented and tested independently.
+# When modifying a helper, the test and the documentation are
+# expected to be updated and it is easier of they are collocated. A
+# test for a given function can be run with
+#
+# ~~~~~~~~~~~~~~~~{.sh}
+# ceph-helpers.sh TESTS test_get_osds
+# ~~~~~~~~~~~~~~~~
+#
+# and all the tests (i.e. all functions matching test_*) are run
+# with:
+#
+# ~~~~~~~~~~~~~~~~{.sh}
+# ceph-helpers.sh TESTS
+# ~~~~~~~~~~~~~~~~
+#
+# A test function takes a single argument : the directory dedicated
+# to the tests. It is expected to not create any file outside of this
+# directory and remove it entirely when it completes successfully.
+#
+
+
+function get_asok_dir() {
+ if [ -n "$CEPH_ASOK_DIR" ]; then
+ echo "$CEPH_ASOK_DIR"
+ else
+ echo ${TMPDIR:-/tmp}/ceph-asok.$$
+ fi
+}
+
+function get_asok_path() {
+ local name=$1
+ if [ -n "$name" ]; then
+ echo $(get_asok_dir)/ceph-$name.asok
+ else
+ echo $(get_asok_dir)/\$cluster-\$name.asok
+ fi
+}
+##
+# Cleanup any leftovers found in **dir** via **teardown**
+# and reset **dir** as an empty environment.
+#
+# @param dir path name of the environment
+# @return 0 on success, 1 on error
+#
+function setup() {
+ local dir=$1
+ teardown $dir || return 1
+ mkdir -p $dir
+ mkdir -p $(get_asok_dir)
+ if [ $(ulimit -n) -le 1024 ]; then
+ ulimit -n 4096 || return 1
+ fi
+ if [ -z "$LOCALRUN" ]; then
+ trap "teardown $dir 1" TERM HUP INT
+ fi
+}
+
+function test_setup() {
+ local dir=$dir
+ setup $dir || return 1
+ test -d $dir || return 1
+ setup $dir || return 1
+ test -d $dir || return 1
+ teardown $dir
+}
+
+#######################################################################
+
+##
+# Kill all daemons for which a .pid file exists in **dir** and remove
+# **dir**. If the file system in which **dir** is btrfs, delete all
+# subvolumes that relate to it.
+#
+# @param dir path name of the environment
+# @param dumplogs pass "1" to dump logs otherwise it will only if cores found
+# @return 0 on success, 1 on error
+#
+function teardown() {
+ local dir=$1
+ local dumplogs=$2
+ kill_daemons $dir KILL
+ if [ `uname` != FreeBSD ] \
+ && [ $(stat -f -c '%T' .) == "btrfs" ]; then
+ __teardown_btrfs $dir
+ fi
+ local cores="no"
+ local pattern="$(sysctl -n $KERNCORE)"
+ # See if we have apport core handling
+ if [ "${pattern:0:1}" = "|" ]; then
+ # TODO: Where can we get the dumps?
+ # Not sure where the dumps really are so this will look in the CWD
+ pattern=""
+ fi
+ # Local we start with core and teuthology ends with core
+ if ls $(dirname "$pattern") | grep -q '^core\|core$' ; then
+ cores="yes"
+ if [ -n "$LOCALRUN" ]; then
+ mkdir /tmp/cores.$$ 2> /dev/null || true
+ for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do
+ mv $i /tmp/cores.$$
+ done
+ fi
+ fi
+ if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then
+ if [ -n "$LOCALRUN" ]; then
+ display_logs $dir
+ else
+ # Move logs to where Teuthology will archive it
+ mkdir -p $TESTDIR/archive/log
+ mv $dir/*.log $TESTDIR/archive/log
+ fi
+ fi
+ rm -fr $dir
+ rm -rf $(get_asok_dir)
+ if [ "$cores" = "yes" ]; then
+ echo "ERROR: Failure due to cores found"
+ if [ -n "$LOCALRUN" ]; then
+ echo "Find saved core files in /tmp/cores.$$"
+ fi
+ return 1
+ fi
+ return 0
+}
+
+function __teardown_btrfs() {
+ local btrfs_base_dir=$1
+ local btrfs_root=$(df -P . | tail -1 | $AWK '{print $NF}')
+ local btrfs_dirs=$(cd $btrfs_base_dir; sudo btrfs subvolume list -t . | $AWK '/^[0-9]/ {print $4}' | grep "$btrfs_base_dir/$btrfs_dir")
+ for subvolume in $btrfs_dirs; do
+ sudo btrfs subvolume delete $btrfs_root/$subvolume
+ done
+}
+
+function test_teardown() {
+ local dir=$dir
+ setup $dir || return 1
+ teardown $dir || return 1
+ ! test -d $dir || return 1
+}
+
+#######################################################################
+
+##
+# Sends a signal to a single daemon.
+# This is a helper function for kill_daemons
+#
+# After the daemon is sent **signal**, its actual termination
+# will be verified by sending it signal 0. If the daemon is
+# still alive, kill_daemon will pause for a few seconds and
+# try again. This will repeat for a fixed number of times
+# before kill_daemon returns on failure. The list of
+# sleep intervals can be specified as **delays** and defaults
+# to:
+#
+# 0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120
+#
+# This sequence is designed to run first a very short sleep time (0.1)
+# if the machine is fast enough and the daemon terminates in a fraction of a
+# second. The increasing sleep numbers should give plenty of time for
+# the daemon to die even on the slowest running machine. If a daemon
+# takes more than a few minutes to stop (the sum of all sleep times),
+# there probably is no point in waiting more and a number of things
+# are likely to go wrong anyway: better give up and return on error.
+#
+# @param pid the process id to send a signal
+# @param send_signal the signal to send
+# @param delays sequence of sleep times before failure
+#
+function kill_daemon() {
+ local pid=$(cat $1)
+ local send_signal=$2
+ local delays=${3:-0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
+ local exit_code=1
+ # In order to try after the last large sleep add 0 at the end so we check
+ # one last time before dropping out of the loop
+ for try in $delays 0 ; do
+ if kill -$send_signal $pid 2> /dev/null ; then
+ exit_code=1
+ else
+ exit_code=0
+ break
+ fi
+ send_signal=0
+ sleep $try
+ done;
+ return $exit_code
+}
+
+function test_kill_daemon() {
+ local dir=$1
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ name_prefix=osd
+ for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+ #
+ # sending signal 0 won't kill the daemon
+ # waiting just for one second instead of the default schedule
+ # allows us to quickly verify what happens when kill fails
+ # to stop the daemon (i.e. it must return false)
+ #
+ ! kill_daemon $pidfile 0 1 || return 1
+ #
+ # killing just the osd and verify the mon still is responsive
+ #
+ kill_daemon $pidfile TERM || return 1
+ done
+
+ name_prefix=mgr
+ for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+ #
+ # kill the mgr
+ #
+ kill_daemon $pidfile TERM || return 1
+ done
+
+ name_prefix=mon
+ for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+ #
+ # kill the mon and verify it cannot be reached
+ #
+ kill_daemon $pidfile TERM || return 1
+ ! timeout 5 ceph status || return 1
+ done
+
+ teardown $dir || return 1
+}
+
+##
+# Kill all daemons for which a .pid file exists in **dir**. Each
+# daemon is sent a **signal** and kill_daemons waits for it to exit
+# during a few minutes. By default all daemons are killed. If a
+# **name_prefix** is provided, only the daemons for which a pid
+# file is found matching the prefix are killed. See run_osd and
+# run_mon for more information about the name conventions for
+# the pid files.
+#
+# Send TERM to all daemons : kill_daemons $dir
+# Send KILL to all daemons : kill_daemons $dir KILL
+# Send KILL to all osds : kill_daemons $dir KILL osd
+# Send KILL to osd 1 : kill_daemons $dir KILL osd.1
+#
+# If a daemon is sent the TERM signal and does not terminate
+# within a few minutes, it will still be running even after
+# kill_daemons returns.
+#
+# If all daemons are kill successfully the function returns 0
+# if at least one daemon remains, this is treated as an
+# error and the function return 1.
+#
+# @param dir path name of the environment
+# @param signal name of the first signal (defaults to TERM)
+# @param name_prefix only kill match daemons (defaults to all)
+# @param delays sequence of sleep times before failure
+# @return 0 on success, 1 on error
+#
+function kill_daemons() {
+ local trace=$(shopt -q -o xtrace && echo true || echo false)
+ $trace && shopt -u -o xtrace
+ local dir=$1
+ local signal=${2:-TERM}
+ local name_prefix=$3 # optional, osd, mon, osd.1
+ local delays=$4 #optional timing
+ local status=0
+ local pids=""
+
+ for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+ run_in_background pids kill_daemon $pidfile $signal $delays
+ done
+
+ wait_background pids
+ status=$?
+
+ $trace && shopt -s -o xtrace
+ return $status
+}
+
+function test_kill_daemons() {
+ local dir=$1
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ #
+ # sending signal 0 won't kill the daemon
+ # waiting just for one second instead of the default schedule
+ # allows us to quickly verify what happens when kill fails
+ # to stop the daemon (i.e. it must return false)
+ #
+ ! kill_daemons $dir 0 osd 1 || return 1
+ #
+ # killing just the osd and verify the mon still is responsive
+ #
+ kill_daemons $dir TERM osd || return 1
+ #
+ # kill the mgr
+ #
+ kill_daemons $dir TERM mgr || return 1
+ #
+ # kill the mon and verify it cannot be reached
+ #
+ kill_daemons $dir TERM || return 1
+ ! timeout 5 ceph status || return 1
+ teardown $dir || return 1
+}
+
+#
+# return a random TCP port which is not used yet
+#
+# please note, there could be racing if we use this function for
+# a free port, and then try to bind on this port.
+#
+function get_unused_port() {
+ local ip=127.0.0.1
+ python3 -c "import socket; s=socket.socket(); s.bind(('$ip', 0)); print(s.getsockname()[1]); s.close()"
+}
+
+#######################################################################
+
+##
+# Run a monitor by the name mon.**id** with data in **dir**/**id**.
+# The logs can be found in **dir**/mon.**id**.log and the pid file
+# is **dir**/mon.**id**.pid and the admin socket is
+# **dir**/**id**/ceph-mon.**id**.asok.
+#
+# The remaining arguments are passed verbatim to ceph-mon --mkfs
+# and the ceph-mon daemon.
+#
+# Two mandatory arguments must be provided: --fsid and --mon-host
+# Instead of adding them to every call to run_mon, they can be
+# set in the CEPH_ARGS environment variable to be read implicitly
+# by every ceph command.
+#
+# The CEPH_CONF variable is expected to be set to /dev/null to
+# only rely on arguments for configuration.
+#
+# Examples:
+#
+# CEPH_ARGS="--fsid=$(uuidgen) "
+# CEPH_ARGS+="--mon-host=127.0.0.1:7018 "
+# run_mon $dir a # spawn a mon and bind port 7018
+# run_mon $dir a --debug-filestore=20 # spawn with filestore debugging
+#
+# If mon_initial_members is not set, the default rbd pool is deleted
+# and replaced with a replicated pool with less placement groups to
+# speed up initialization. If mon_initial_members is set, no attempt
+# is made to recreate the rbd pool because it would hang forever,
+# waiting for other mons to join.
+#
+# A **dir**/ceph.conf file is created but not meant to be used by any
+# function. It is convenient for debugging a failure with:
+#
+# ceph --conf **dir**/ceph.conf -s
+#
+# @param dir path name of the environment
+# @param id mon identifier
+# @param ... can be any option valid for ceph-mon
+# @return 0 on success, 1 on error
+#
+function run_mon() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local data=$dir/$id
+
+ ceph-mon \
+ --id $id \
+ --mkfs \
+ --mon-data=$data \
+ --run-dir=$dir \
+ "$@" || return 1
+
+ ceph-mon \
+ --id $id \
+ --osd-failsafe-full-ratio=.99 \
+ --mon-osd-full-ratio=.99 \
+ --mon-data-avail-crit=1 \
+ --mon-data-avail-warn=5 \
+ --paxos-propose-interval=0.1 \
+ --osd-crush-chooseleaf-type=0 \
+ $EXTRA_OPTS \
+ --debug-mon 20 \
+ --debug-ms 20 \
+ --debug-paxos 20 \
+ --chdir= \
+ --mon-data=$data \
+ --log-file=$dir/\$name.log \
+ --admin-socket=$(get_asok_path) \
+ --mon-cluster-log-file=$dir/log \
+ --run-dir=$dir \
+ --pid-file=$dir/\$name.pid \
+ --mon-allow-pool-delete \
+ --mon-allow-pool-size-one \
+ --osd-pool-default-pg-autoscale-mode off \
+ --mon-osd-backfillfull-ratio .99 \
+ --mon-warn-on-insecure-global-id-reclaim-allowed=false \
+ "$@" || return 1
+
+ cat > $dir/ceph.conf <<EOF
+[global]
+fsid = $(get_config mon $id fsid)
+mon host = $(get_config mon $id mon_host)
+EOF
+}
+
+function test_run_mon() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ ceph mon dump | grep "mon.a" || return 1
+ kill_daemons $dir || return 1
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ create_rbd_pool || return 1
+ ceph osd dump | grep "pool 1 'rbd'" || return 1
+ local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \
+ config get osd_pool_default_size)
+ test "$size" = '{"osd_pool_default_size":"3"}' || return 1
+
+ ! CEPH_ARGS='' ceph status || return 1
+ CEPH_ARGS='' ceph --conf $dir/ceph.conf status || return 1
+
+ kill_daemons $dir || return 1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \
+ config get osd_pool_default_size)
+ test "$size" = '{"osd_pool_default_size":"1"}' || return 1
+ kill_daemons $dir || return 1
+
+ CEPH_ARGS="$CEPH_ARGS --osd_pool_default_size=2" \
+ run_mon $dir a || return 1
+ local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \
+ config get osd_pool_default_size)
+ test "$size" = '{"osd_pool_default_size":"2"}' || return 1
+ kill_daemons $dir || return 1
+
+ teardown $dir || return 1
+}
+
+function create_rbd_pool() {
+ ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1
+ create_pool rbd $PG_NUM || return 1
+ rbd pool init rbd
+}
+
+function create_pool() {
+ ceph osd pool create "$@"
+ sleep 1
+}
+
+function delete_pool() {
+ local poolname=$1
+ ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
+}
+
+#######################################################################
+
+function run_mgr() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local data=$dir/$id
+
+ ceph config set mgr mgr_pool false --force
+ ceph-mgr \
+ --id $id \
+ $EXTRA_OPTS \
+ --osd-failsafe-full-ratio=.99 \
+ --debug-mgr 20 \
+ --debug-objecter 20 \
+ --debug-ms 20 \
+ --debug-paxos 20 \
+ --chdir= \
+ --mgr-data=$data \
+ --log-file=$dir/\$name.log \
+ --admin-socket=$(get_asok_path) \
+ --run-dir=$dir \
+ --pid-file=$dir/\$name.pid \
+ --mgr-module-path=$(realpath ${CEPH_ROOT}/src/pybind/mgr) \
+ "$@" || return 1
+}
+
+function run_mds() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local data=$dir/$id
+
+ ceph-mds \
+ --id $id \
+ $EXTRA_OPTS \
+ --debug-mds 20 \
+ --debug-objecter 20 \
+ --debug-ms 20 \
+ --chdir= \
+ --mds-data=$data \
+ --log-file=$dir/\$name.log \
+ --admin-socket=$(get_asok_path) \
+ --run-dir=$dir \
+ --pid-file=$dir/\$name.pid \
+ "$@" || return 1
+}
+
+#######################################################################
+
+##
+# Create (prepare) and run (activate) an osd by the name osd.**id**
+# with data in **dir**/**id**. The logs can be found in
+# **dir**/osd.**id**.log, the pid file is **dir**/osd.**id**.pid and
+# the admin socket is **dir**/**id**/ceph-osd.**id**.asok.
+#
+# The remaining arguments are passed verbatim to ceph-osd.
+#
+# Two mandatory arguments must be provided: --fsid and --mon-host
+# Instead of adding them to every call to run_osd, they can be
+# set in the CEPH_ARGS environment variable to be read implicitly
+# by every ceph command.
+#
+# The CEPH_CONF variable is expected to be set to /dev/null to
+# only rely on arguments for configuration.
+#
+# The run_osd function creates the OSD data directory on the **dir**/**id**
+# directory and relies on the activate_osd function to run the daemon.
+#
+# Examples:
+#
+# CEPH_ARGS="--fsid=$(uuidgen) "
+# CEPH_ARGS+="--mon-host=127.0.0.1:7018 "
+# run_osd $dir 0 # prepare and activate an osd using the monitor listening on 7018
+#
+# @param dir path name of the environment
+# @param id osd identifier
+# @param ... can be any option valid for ceph-osd
+# @return 0 on success, 1 on error
+#
+function run_osd() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local osd_data=$dir/$id
+
+ local ceph_args="$CEPH_ARGS"
+ ceph_args+=" --osd-failsafe-full-ratio=.99"
+ ceph_args+=" --osd-journal-size=100"
+ ceph_args+=" --osd-scrub-load-threshold=2000"
+ ceph_args+=" --osd-data=$osd_data"
+ ceph_args+=" --osd-journal=${osd_data}/journal"
+ ceph_args+=" --chdir="
+ ceph_args+=$EXTRA_OPTS
+ ceph_args+=" --run-dir=$dir"
+ ceph_args+=" --admin-socket=$(get_asok_path)"
+ ceph_args+=" --debug-osd=20"
+ ceph_args+=" --debug-ms=1"
+ ceph_args+=" --debug-monc=20"
+ ceph_args+=" --log-file=$dir/\$name.log"
+ ceph_args+=" --pid-file=$dir/\$name.pid"
+ ceph_args+=" --osd-max-object-name-len=460"
+ ceph_args+=" --osd-max-object-namespace-len=64"
+ ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*"
+ ceph_args+=" --osd-mclock-profile=high_recovery_ops"
+ ceph_args+=" "
+ ceph_args+="$@"
+ mkdir -p $osd_data
+
+ local uuid=`uuidgen`
+ echo "add osd$id $uuid"
+ OSD_SECRET=$(ceph-authtool --gen-print-key)
+ echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json
+ ceph osd new $uuid -i $osd_data/new.json
+ rm $osd_data/new.json
+ ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid
+
+ local key_fn=$osd_data/keyring
+ cat > $key_fn<<EOF
+[osd.$id]
+key = $OSD_SECRET
+EOF
+ echo adding osd$id key to auth repository
+ ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd"
+ echo start osd.$id
+ ceph-osd -i $id $ceph_args &
+
+ # If noup is set, then can't wait for this osd
+ if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then
+ return 0
+ fi
+ wait_for_osd up $id || return 1
+
+}
+
+function run_osd_filestore() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local osd_data=$dir/$id
+
+ local ceph_args="$CEPH_ARGS"
+ ceph_args+=" --osd-failsafe-full-ratio=.99"
+ ceph_args+=" --osd-journal-size=100"
+ ceph_args+=" --osd-scrub-load-threshold=2000"
+ ceph_args+=" --osd-data=$osd_data"
+ ceph_args+=" --osd-journal=${osd_data}/journal"
+ ceph_args+=" --chdir="
+ ceph_args+=$EXTRA_OPTS
+ ceph_args+=" --run-dir=$dir"
+ ceph_args+=" --admin-socket=$(get_asok_path)"
+ ceph_args+=" --debug-osd=20"
+ ceph_args+=" --debug-ms=1"
+ ceph_args+=" --debug-monc=20"
+ ceph_args+=" --log-file=$dir/\$name.log"
+ ceph_args+=" --pid-file=$dir/\$name.pid"
+ ceph_args+=" --osd-max-object-name-len=460"
+ ceph_args+=" --osd-max-object-namespace-len=64"
+ ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*"
+ ceph_args+=" "
+ ceph_args+="$@"
+ mkdir -p $osd_data
+
+ local uuid=`uuidgen`
+ echo "add osd$osd $uuid"
+ OSD_SECRET=$(ceph-authtool --gen-print-key)
+ echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json
+ ceph osd new $uuid -i $osd_data/new.json
+ rm $osd_data/new.json
+ ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid --osd-objectstore=filestore
+
+ local key_fn=$osd_data/keyring
+ cat > $key_fn<<EOF
+[osd.$osd]
+key = $OSD_SECRET
+EOF
+ echo adding osd$id key to auth repository
+ ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd"
+ echo start osd.$id
+ ceph-osd -i $id $ceph_args &
+
+ # If noup is set, then can't wait for this osd
+ if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then
+ return 0
+ fi
+ wait_for_osd up $id || return 1
+
+
+}
+
+function test_run_osd() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills)
+ echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1
+
+ run_osd $dir 1 --osd-max-backfills 20 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.1) \
+ config get osd_max_backfills)
+ test "$backfills" = '{"osd_max_backfills":"20"}' || return 1
+
+ CEPH_ARGS="$CEPH_ARGS --osd-max-backfills 30" run_osd $dir 2 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.2) \
+ config get osd_max_backfills)
+ test "$backfills" = '{"osd_max_backfills":"30"}' || return 1
+
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Shutdown and remove all traces of the osd by the name osd.**id**.
+#
+# The OSD is shutdown with the TERM signal. It is then removed from
+# the auth list, crush map, osd map etc and the files associated with
+# it are also removed.
+#
+# @param dir path name of the environment
+# @param id osd identifier
+# @return 0 on success, 1 on error
+#
+function destroy_osd() {
+ local dir=$1
+ local id=$2
+
+ ceph osd out osd.$id || return 1
+ kill_daemons $dir TERM osd.$id || return 1
+ ceph osd down osd.$id || return 1
+ ceph osd purge osd.$id --yes-i-really-mean-it || return 1
+ teardown $dir/$id || return 1
+ rm -fr $dir/$id
+}
+
+function test_destroy_osd() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ destroy_osd $dir 0 || return 1
+ ! ceph osd dump | grep "osd.$id " || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Run (activate) an osd by the name osd.**id** with data in
+# **dir**/**id**. The logs can be found in **dir**/osd.**id**.log,
+# the pid file is **dir**/osd.**id**.pid and the admin socket is
+# **dir**/**id**/ceph-osd.**id**.asok.
+#
+# The remaining arguments are passed verbatim to ceph-osd.
+#
+# Two mandatory arguments must be provided: --fsid and --mon-host
+# Instead of adding them to every call to activate_osd, they can be
+# set in the CEPH_ARGS environment variable to be read implicitly
+# by every ceph command.
+#
+# The CEPH_CONF variable is expected to be set to /dev/null to
+# only rely on arguments for configuration.
+#
+# The activate_osd function expects a valid OSD data directory
+# in **dir**/**id**, either just created via run_osd or re-using
+# one left by a previous run of ceph-osd. The ceph-osd daemon is
+# run directly on the foreground
+#
+# The activate_osd function blocks until the monitor reports the osd
+# up. If it fails to do so within $TIMEOUT seconds, activate_osd
+# fails.
+#
+# Examples:
+#
+# CEPH_ARGS="--fsid=$(uuidgen) "
+# CEPH_ARGS+="--mon-host=127.0.0.1:7018 "
+# activate_osd $dir 0 # activate an osd using the monitor listening on 7018
+#
+# @param dir path name of the environment
+# @param id osd identifier
+# @param ... can be any option valid for ceph-osd
+# @return 0 on success, 1 on error
+#
+function activate_osd() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local osd_data=$dir/$id
+
+ local ceph_args="$CEPH_ARGS"
+ ceph_args+=" --osd-failsafe-full-ratio=.99"
+ ceph_args+=" --osd-journal-size=100"
+ ceph_args+=" --osd-scrub-load-threshold=2000"
+ ceph_args+=" --osd-data=$osd_data"
+ ceph_args+=" --osd-journal=${osd_data}/journal"
+ ceph_args+=" --chdir="
+ ceph_args+=$EXTRA_OPTS
+ ceph_args+=" --run-dir=$dir"
+ ceph_args+=" --admin-socket=$(get_asok_path)"
+ ceph_args+=" --debug-osd=20"
+ ceph_args+=" --log-file=$dir/\$name.log"
+ ceph_args+=" --pid-file=$dir/\$name.pid"
+ ceph_args+=" --osd-max-object-name-len=460"
+ ceph_args+=" --osd-max-object-namespace-len=64"
+ ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*"
+ ceph_args+=" --osd-mclock-profile=high_recovery_ops"
+ ceph_args+=" "
+ ceph_args+="$@"
+ mkdir -p $osd_data
+
+ echo start osd.$id
+ ceph-osd -i $id $ceph_args &
+
+ [ "$id" = "$(cat $osd_data/whoami)" ] || return 1
+
+ # If noup is set, then can't wait for this osd
+ if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then
+ return 0
+ fi
+ wait_for_osd up $id || return 1
+}
+
+function test_activate_osd() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills)
+ echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1
+
+ kill_daemons $dir TERM osd || return 1
+
+ activate_osd $dir 0 --osd-max-backfills 20 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills)
+ test "$backfills" = '{"osd_max_backfills":"20"}' || return 1
+
+ teardown $dir || return 1
+}
+
+function test_activate_osd_after_mark_down() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills)
+ echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1
+
+ kill_daemons $dir TERM osd || return 1
+ ceph osd down 0 || return 1
+ wait_for_osd down 0 || return 1
+
+ activate_osd $dir 0 --osd-max-backfills 20 || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills)
+ test "$backfills" = '{"osd_max_backfills":"20"}' || return 1
+
+ teardown $dir || return 1
+}
+
+function test_activate_osd_skip_benchmark() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ # Skip the osd benchmark during first osd bring-up.
+ run_osd $dir 0 --osd-op-queue=mclock_scheduler \
+ --osd-mclock-skip-benchmark=true || return 1
+ local max_iops_hdd_def=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd)
+ local max_iops_ssd_def=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd)
+
+ kill_daemons $dir TERM osd || return 1
+ ceph osd down 0 || return 1
+ wait_for_osd down 0 || return 1
+
+ # Skip the osd benchmark during activation as well. Validate that
+ # the max osd capacities are left unchanged.
+ activate_osd $dir 0 --osd-op-queue=mclock_scheduler \
+ --osd-mclock-skip-benchmark=true || return 1
+ local max_iops_hdd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd)
+ local max_iops_ssd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd)
+
+ test "$max_iops_hdd_def" = "$max_iops_hdd_after_boot" || return 1
+ test "$max_iops_ssd_def" = "$max_iops_ssd_after_boot" || return 1
+
+ teardown $dir || return 1
+}
+#######################################################################
+
+##
+# Wait until the OSD **id** is either up or down, as specified by
+# **state**. It fails after $TIMEOUT seconds.
+#
+# @param state either up or down
+# @param id osd identifier
+# @return 0 on success, 1 on error
+#
+function wait_for_osd() {
+ local state=$1
+ local id=$2
+
+ status=1
+ for ((i=0; i < $TIMEOUT; i++)); do
+ echo $i
+ if ! ceph osd dump | grep "osd.$id $state"; then
+ sleep 1
+ else
+ status=0
+ break
+ fi
+ done
+ return $status
+}
+
+function test_wait_for_osd() {
+ local dir=$1
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ wait_for_osd up 0 || return 1
+ wait_for_osd up 1 || return 1
+ kill_daemons $dir TERM osd.0 || return 1
+ wait_for_osd down 0 || return 1
+ ( TIMEOUT=1 ; ! wait_for_osd up 0 ) || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Display the list of OSD ids supporting the **objectname** stored in
+# **poolname**, as reported by ceph osd map.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT white space separated list of OSD ids
+# @return 0 on success, 1 on error
+#
+function get_osds() {
+ local poolname=$1
+ local objectname=$2
+
+ local osds=$(ceph --format json osd map $poolname $objectname 2>/dev/null | \
+ jq '.acting | .[]')
+ # get rid of the trailing space
+ echo $osds
+}
+
+function test_get_osds() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ create_rbd_pool || return 1
+ get_osds rbd GROUP | grep --quiet '^[0-1] [0-1]$' || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Wait for the monitor to form quorum (optionally, of size N)
+#
+# @param timeout duration (lower-bound) to wait for quorum to be formed
+# @param quorumsize size of quorum to wait for
+# @return 0 on success, 1 on error
+#
+function wait_for_quorum() {
+ local timeout=$1
+ local quorumsize=$2
+
+ if [[ -z "$timeout" ]]; then
+ timeout=300
+ fi
+
+ if [[ -z "$quorumsize" ]]; then
+ timeout $timeout ceph quorum_status --format=json >&/dev/null || return 1
+ return 0
+ fi
+
+ no_quorum=1
+ wait_until=$((`date +%s` + $timeout))
+ while [[ $(date +%s) -lt $wait_until ]]; do
+ jqfilter='.quorum | length == '$quorumsize
+ jqinput="$(timeout $timeout ceph quorum_status --format=json 2>/dev/null)"
+ res=$(echo $jqinput | jq "$jqfilter")
+ if [[ "$res" == "true" ]]; then
+ no_quorum=0
+ break
+ fi
+ done
+ return $no_quorum
+}
+
+#######################################################################
+
+##
+# Return the PG of supporting the **objectname** stored in
+# **poolname**, as reported by ceph osd map.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT a PG
+# @return 0 on success, 1 on error
+#
+function get_pg() {
+ local poolname=$1
+ local objectname=$2
+
+ ceph --format json osd map $poolname $objectname 2>/dev/null | jq -r '.pgid'
+}
+
+function test_get_pg() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ get_pg rbd GROUP | grep --quiet '^[0-9]\.[0-9a-f][0-9a-f]*$' || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the value of the **config**, obtained via the config get command
+# of the admin socket of **daemon**.**id**.
+#
+# @param daemon mon or osd
+# @param id mon or osd ID
+# @param config the configuration variable name as found in config_opts.h
+# @param STDOUT the config value
+# @return 0 on success, 1 on error
+#
+function get_config() {
+ local daemon=$1
+ local id=$2
+ local config=$3
+
+ CEPH_ARGS='' \
+ ceph --format json daemon $(get_asok_path $daemon.$id) \
+ config get $config 2> /dev/null | \
+ jq -r ".$config"
+}
+
+function test_get_config() {
+ local dir=$1
+
+ # override the default config using command line arg and check it
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ test $(get_config mon a osd_pool_default_size) = 1 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_max_scrubs=3 || return 1
+ test $(get_config osd 0 osd_max_scrubs) = 3 || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Set the **config** to specified **value**, via the config set command
+# of the admin socket of **daemon**.**id**
+#
+# @param daemon mon or osd
+# @param id mon or osd ID
+# @param config the configuration variable name as found in config_opts.h
+# @param value the config value
+# @return 0 on success, 1 on error
+#
+function set_config() {
+ local daemon=$1
+ local id=$2
+ local config=$3
+ local value=$4
+
+ test $(env CEPH_ARGS='' ceph --format json daemon $(get_asok_path $daemon.$id) \
+ config set $config $value 2> /dev/null | \
+ jq 'has("success")') == true
+}
+
+function test_set_config() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ test $(get_config mon a ms_crc_header) = true || return 1
+ set_config mon a ms_crc_header false || return 1
+ test $(get_config mon a ms_crc_header) = false || return 1
+ set_config mon a ms_crc_header true || return 1
+ test $(get_config mon a ms_crc_header) = true || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the OSD id of the primary OSD supporting the **objectname**
+# stored in **poolname**, as reported by ceph osd map.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT the primary OSD id
+# @return 0 on success, 1 on error
+#
+function get_primary() {
+ local poolname=$1
+ local objectname=$2
+
+ ceph --format json osd map $poolname $objectname 2>/dev/null | \
+ jq '.acting_primary'
+}
+
+function test_get_primary() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ local osd=0
+ run_mgr $dir x || return 1
+ run_osd $dir $osd || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ test $(get_primary rbd GROUP) = $osd || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the id of any OSD supporting the **objectname** stored in
+# **poolname**, as reported by ceph osd map, except the primary.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT the OSD id
+# @return 0 on success, 1 on error
+#
+function get_not_primary() {
+ local poolname=$1
+ local objectname=$2
+
+ local primary=$(get_primary $poolname $objectname)
+ ceph --format json osd map $poolname $objectname 2>/dev/null | \
+ jq ".acting | map(select (. != $primary)) | .[0]"
+}
+
+function test_get_not_primary() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ local primary=$(get_primary rbd GROUP)
+ local not_primary=$(get_not_primary rbd GROUP)
+ test $not_primary != $primary || return 1
+ test $not_primary = 0 -o $not_primary = 1 || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+function _objectstore_tool_nodown() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+ local osd_data=$dir/$id
+
+ ceph-objectstore-tool \
+ --data-path $osd_data \
+ "$@" || return 1
+}
+
+function _objectstore_tool_nowait() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+
+ kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1
+
+ _objectstore_tool_nodown $dir $id "$@" || return 1
+ activate_osd $dir $id $ceph_osd_args >&2 || return 1
+}
+
+##
+# Run ceph-objectstore-tool against the OSD **id** using the data path
+# **dir**. The OSD is killed with TERM prior to running
+# ceph-objectstore-tool because access to the data path is
+# exclusive. The OSD is restarted after the command completes. The
+# objectstore_tool returns after all PG are active+clean again.
+#
+# @param dir the data path of the OSD
+# @param id the OSD id
+# @param ... arguments to ceph-objectstore-tool
+# @param STDIN the input of ceph-objectstore-tool
+# @param STDOUT the output of ceph-objectstore-tool
+# @return 0 on success, 1 on error
+#
+# The value of $ceph_osd_args will be passed to restarted osds
+#
+function objectstore_tool() {
+ local dir=$1
+ shift
+ local id=$1
+ shift
+
+ _objectstore_tool_nowait $dir $id "$@" || return 1
+ wait_for_clean >&2
+}
+
+function test_objectstore_tool() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ local osd=0
+ run_mgr $dir x || return 1
+ run_osd $dir $osd || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ rados --pool rbd put GROUP /etc/group || return 1
+ objectstore_tool $dir $osd GROUP get-bytes | \
+ diff - /etc/group
+ ! objectstore_tool $dir $osd NOTEXISTS get-bytes || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Predicate checking if there is an ongoing recovery in the
+# cluster. If any of the recovering_{keys,bytes,objects}_per_sec
+# counters are reported by ceph status, it means recovery is in
+# progress.
+#
+# @return 0 if recovery in progress, 1 otherwise
+#
+function get_is_making_recovery_progress() {
+ local recovery_progress
+ recovery_progress+=".recovering_keys_per_sec + "
+ recovery_progress+=".recovering_bytes_per_sec + "
+ recovery_progress+=".recovering_objects_per_sec"
+ local progress=$(ceph --format json status 2>/dev/null | \
+ jq -r ".pgmap | $recovery_progress")
+ test "$progress" != null
+}
+
+function test_get_is_making_recovery_progress() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ ! get_is_making_recovery_progress || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the number of active PGs in the cluster. A PG is active if
+# ceph pg dump pgs reports it both **active** and **clean** and that
+# not **stale**.
+#
+# @param STDOUT the number of active PGs
+# @return 0 on success, 1 on error
+#
+function get_num_active_clean() {
+ local expression
+ expression+="select(contains(\"active\") and contains(\"clean\")) | "
+ expression+="select(contains(\"stale\") | not)"
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+function test_get_num_active_clean() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ local num_active_clean=$(get_num_active_clean)
+ test "$num_active_clean" = $PG_NUM || return 1
+ teardown $dir || return 1
+}
+
+##
+# Return the number of active or peered PGs in the cluster. A PG matches if
+# ceph pg dump pgs reports it is either **active** or **peered** and that
+# not **stale**.
+#
+# @param STDOUT the number of active PGs
+# @return 0 on success, 1 on error
+#
+function get_num_active_or_peered() {
+ local expression
+ expression+="select(contains(\"active\") or contains(\"peered\")) | "
+ expression+="select(contains(\"stale\") | not)"
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+function test_get_num_active_or_peered() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ local num_peered=$(get_num_active_or_peered)
+ test "$num_peered" = $PG_NUM || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the number of PGs in the cluster, according to
+# ceph pg dump pgs.
+#
+# @param STDOUT the number of PGs
+# @return 0 on success, 1 on error
+#
+function get_num_pgs() {
+ ceph --format json status 2>/dev/null | jq '.pgmap.num_pgs'
+}
+
+function test_get_num_pgs() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ local num_pgs=$(get_num_pgs)
+ test "$num_pgs" -gt 0 || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the OSD ids in use by at least one PG in the cluster (either
+# in the up or the acting set), according to ceph pg dump pgs. Every
+# OSD id shows as many times as they are used in up and acting sets.
+# If an OSD id is in both the up and acting set of a given PG, it will
+# show twice.
+#
+# @param STDOUT a sorted list of OSD ids
+# @return 0 on success, 1 on error
+#
+function get_osd_id_used_by_pgs() {
+ ceph --format json pg dump pgs 2>/dev/null | jq '.pg_stats | .[] | .up[], .acting[]' | sort
+}
+
+function test_get_osd_id_used_by_pgs() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ local osd_ids=$(get_osd_id_used_by_pgs | uniq)
+ test "$osd_ids" = "0" || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Wait until the OSD **id** shows **count** times in the
+# PGs (see get_osd_id_used_by_pgs for more information about
+# how OSD ids are counted).
+#
+# @param id the OSD id
+# @param count the number of time it must show in the PGs
+# @return 0 on success, 1 on error
+#
+function wait_osd_id_used_by_pgs() {
+ local id=$1
+ local count=$2
+
+ status=1
+ for ((i=0; i < $TIMEOUT / 5; i++)); do
+ echo $i
+ if ! test $(get_osd_id_used_by_pgs | grep -c $id) = $count ; then
+ sleep 5
+ else
+ status=0
+ break
+ fi
+ done
+ return $status
+}
+
+function test_wait_osd_id_used_by_pgs() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ wait_osd_id_used_by_pgs 0 8 || return 1
+ ! TIMEOUT=1 wait_osd_id_used_by_pgs 123 5 || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the date and time of the last completed scrub for **pgid**,
+# as reported by ceph pg dump pgs. Note that a repair also sets this
+# date.
+#
+# @param pgid the id of the PG
+# @param STDOUT the date and time of the last scrub
+# @return 0 on success, 1 on error
+#
+function get_last_scrub_stamp() {
+ local pgid=$1
+ local sname=${2:-last_scrub_stamp}
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function test_get_last_scrub_stamp() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ stamp=$(get_last_scrub_stamp 1.0)
+ test -n "$stamp" || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Predicate checking if the cluster is clean, i.e. all of its PGs are
+# in a clean state (see get_num_active_clean for a definition).
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function is_clean() {
+ num_pgs=$(get_num_pgs)
+ test $num_pgs != 0 || return 1
+ test $(get_num_active_clean) = $num_pgs || return 1
+}
+
+function test_is_clean() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ is_clean || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+calc() { $AWK "BEGIN{print $*}"; }
+
+##
+# Return a list of numbers that are increasingly larger and whose
+# total is **timeout** seconds. It can be used to have short sleep
+# delay while waiting for an event on a fast machine. But if running
+# very slowly the larger delays avoid stressing the machine even
+# further or spamming the logs.
+#
+# @param timeout sum of all delays, in seconds
+# @return a list of sleep delays
+#
+function get_timeout_delays() {
+ local trace=$(shopt -q -o xtrace && echo true || echo false)
+ $trace && shopt -u -o xtrace
+ local timeout=$1
+ local first_step=${2:-1}
+ local max_timeout=${3:-$MAX_TIMEOUT}
+
+ local i
+ local total="0"
+ i=$first_step
+ while test "$(calc $total + $i \<= $timeout)" = "1"; do
+ echo -n "$(calc $i) "
+ total=$(calc $total + $i)
+ i=$(calc $i \* 2)
+ if [ $max_timeout -gt 0 ]; then
+ # Did we reach max timeout ?
+ if [ ${i%.*} -eq ${max_timeout%.*} ] && [ ${i#*.} \> ${max_timeout#*.} ] || [ ${i%.*} -gt ${max_timeout%.*} ]; then
+ # Yes, so let's cap the max wait time to max
+ i=$max_timeout
+ fi
+ fi
+ done
+ if test "$(calc $total \< $timeout)" = "1"; then
+ echo -n "$(calc $timeout - $total) "
+ fi
+ $trace && shopt -s -o xtrace
+}
+
+function test_get_timeout_delays() {
+ test "$(get_timeout_delays 1)" = "1 " || return 1
+ test "$(get_timeout_delays 5)" = "1 2 2 " || return 1
+ test "$(get_timeout_delays 6)" = "1 2 3 " || return 1
+ test "$(get_timeout_delays 7)" = "1 2 4 " || return 1
+ test "$(get_timeout_delays 8)" = "1 2 4 1 " || return 1
+ test "$(get_timeout_delays 1 .1)" = "0.1 0.2 0.4 0.3 " || return 1
+ test "$(get_timeout_delays 1.5 .1)" = "0.1 0.2 0.4 0.8 " || return 1
+ test "$(get_timeout_delays 5 .1)" = "0.1 0.2 0.4 0.8 1.6 1.9 " || return 1
+ test "$(get_timeout_delays 6 .1)" = "0.1 0.2 0.4 0.8 1.6 2.9 " || return 1
+ test "$(get_timeout_delays 6.3 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 " || return 1
+ test "$(get_timeout_delays 20 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 7.3 " || return 1
+ test "$(get_timeout_delays 300 .1 0)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 12.8 25.6 51.2 102.4 95.3 " || return 1
+ test "$(get_timeout_delays 300 .1 10)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 7.3 " || return 1
+}
+
+#######################################################################
+
+##
+# Wait until the cluster becomes clean or if it does not make progress
+# for $WAIT_FOR_CLEAN_TIMEOUT seconds.
+# Progress is measured either via the **get_is_making_recovery_progress**
+# predicate or if the number of clean PGs changes (as returned by get_num_active_clean)
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function wait_for_clean() {
+ local cmd=$1
+ local num_active_clean=-1
+ local cur_active_clean
+ local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1))
+ local -i loop=0
+
+ flush_pg_stats || return 1
+ while test $(get_num_pgs) == 0 ; do
+ sleep 1
+ done
+
+ while true ; do
+ # Comparing get_num_active_clean & get_num_pgs is used to determine
+ # if the cluster is clean. That's almost an inline of is_clean() to
+ # get more performance by avoiding multiple calls of get_num_active_clean.
+ cur_active_clean=$(get_num_active_clean)
+ test $cur_active_clean = $(get_num_pgs) && break
+ if test $cur_active_clean != $num_active_clean ; then
+ loop=0
+ num_active_clean=$cur_active_clean
+ elif get_is_making_recovery_progress ; then
+ loop=0
+ elif (( $loop >= ${#delays[*]} )) ; then
+ ceph report
+ return 1
+ fi
+ # eval is a no-op if cmd is empty
+ eval $cmd
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+ return 0
+}
+
+function test_wait_for_clean() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_osd $dir 0 || return 1
+ run_mgr $dir x || return 1
+ create_rbd_pool || return 1
+ ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1
+ run_osd $dir 1 || return 1
+ wait_for_clean || return 1
+ teardown $dir || return 1
+}
+
+##
+# Wait until the cluster becomes peered or if it does not make progress
+# for $WAIT_FOR_CLEAN_TIMEOUT seconds.
+# Progress is measured either via the **get_is_making_recovery_progress**
+# predicate or if the number of peered PGs changes (as returned by get_num_active_or_peered)
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function wait_for_peered() {
+ local cmd=$1
+ local num_peered=-1
+ local cur_peered
+ local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1))
+ local -i loop=0
+
+ flush_pg_stats || return 1
+ while test $(get_num_pgs) == 0 ; do
+ sleep 1
+ done
+
+ while true ; do
+ # Comparing get_num_active_clean & get_num_pgs is used to determine
+ # if the cluster is clean. That's almost an inline of is_clean() to
+ # get more performance by avoiding multiple calls of get_num_active_clean.
+ cur_peered=$(get_num_active_or_peered)
+ test $cur_peered = $(get_num_pgs) && break
+ if test $cur_peered != $num_peered ; then
+ loop=0
+ num_peered=$cur_peered
+ elif get_is_making_recovery_progress ; then
+ loop=0
+ elif (( $loop >= ${#delays[*]} )) ; then
+ ceph report
+ return 1
+ fi
+ # eval is a no-op if cmd is empty
+ eval $cmd
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+ return 0
+}
+
+function test_wait_for_peered() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_osd $dir 0 || return 1
+ run_mgr $dir x || return 1
+ create_rbd_pool || return 1
+ ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1
+ run_osd $dir 1 || return 1
+ wait_for_peered || return 1
+ teardown $dir || return 1
+}
+
+
+#######################################################################
+
+##
+# Wait until the cluster's health condition disappeared.
+# $TIMEOUT default
+#
+# @param string to grep for in health detail
+# @return 0 if the cluster health doesn't matches request,
+# 1 otherwise if after $TIMEOUT seconds health condition remains.
+#
+function wait_for_health_gone() {
+ local grepstr=$1
+ local -a delays=($(get_timeout_delays $TIMEOUT .1))
+ local -i loop=0
+
+ while ceph health detail | grep "$grepstr" ; do
+ if (( $loop >= ${#delays[*]} )) ; then
+ ceph health detail
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+}
+
+##
+# Wait until the cluster has health condition passed as arg
+# again for $TIMEOUT seconds.
+#
+# @param string to grep for in health detail
+# @return 0 if the cluster health matches request, 1 otherwise
+#
+function wait_for_health() {
+ local grepstr=$1
+ local -a delays=($(get_timeout_delays $TIMEOUT .1))
+ local -i loop=0
+
+ while ! ceph health detail | grep "$grepstr" ; do
+ if (( $loop >= ${#delays[*]} )) ; then
+ ceph health detail
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+}
+
+##
+# Wait until the cluster becomes HEALTH_OK again or if it does not make progress
+# for $TIMEOUT seconds.
+#
+# @return 0 if the cluster is HEALTHY, 1 otherwise
+#
+function wait_for_health_ok() {
+ wait_for_health "HEALTH_OK" || return 1
+}
+
+function test_wait_for_health_ok() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_failsafe_full_ratio=.99 --mon_pg_warn_min_per_osd=0 || return 1
+ run_mgr $dir x --mon_pg_warn_min_per_osd=0 || return 1
+ # start osd_pool_default_size OSDs
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ kill_daemons $dir TERM osd || return 1
+ ceph osd down 0 || return 1
+ # expect TOO_FEW_OSDS warning
+ ! TIMEOUT=1 wait_for_health_ok || return 1
+ # resurrect all OSDs
+ activate_osd $dir 0 || return 1
+ activate_osd $dir 1 || return 1
+ activate_osd $dir 2 || return 1
+ wait_for_health_ok || return 1
+ teardown $dir || return 1
+}
+
+
+#######################################################################
+
+##
+# Run repair on **pgid** and wait until it completes. The repair
+# function will fail if repair does not complete within $TIMEOUT
+# seconds.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function repair() {
+ local pgid=$1
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph pg repair $pgid
+ wait_for_scrub $pgid "$last_scrub"
+}
+
+function test_repair() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ repair 1.0 || return 1
+ kill_daemons $dir KILL osd || return 1
+ ! TIMEOUT=1 repair 1.0 || return 1
+ teardown $dir || return 1
+}
+#######################################################################
+
+##
+# Run scrub on **pgid** and wait until it completes. The pg_scrub
+# function will fail if repair does not complete within $TIMEOUT
+# seconds. The pg_scrub is complete whenever the
+# **get_last_scrub_stamp** function reports a timestamp different from
+# the one stored before starting the scrub.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function pg_scrub() {
+ local pgid=$1
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph pg scrub $pgid
+ wait_for_scrub $pgid "$last_scrub"
+}
+
+function pg_deep_scrub() {
+ local pgid=$1
+ local last_scrub=$(get_last_scrub_stamp $pgid last_deep_scrub_stamp)
+ ceph pg deep-scrub $pgid
+ wait_for_scrub $pgid "$last_scrub" last_deep_scrub_stamp
+}
+
+function test_pg_scrub() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ pg_scrub 1.0 || return 1
+ kill_daemons $dir KILL osd || return 1
+ ! TIMEOUT=1 pg_scrub 1.0 || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Run the *command* and expect it to fail (i.e. return a non zero status).
+# The output (stderr and stdout) is stored in a temporary file in *dir*
+# and is expected to contain the string *expected*.
+#
+# Return 0 if the command failed and the string was found. Otherwise
+# return 1 and cat the full output of the command on stderr for debug.
+#
+# @param dir temporary directory to store the output
+# @param expected string to look for in the output
+# @param command ... the command and its arguments
+# @return 0 on success, 1 on error
+#
+
+function expect_failure() {
+ local dir=$1
+ shift
+ local expected="$1"
+ shift
+ local success
+
+ if "$@" > $dir/out 2>&1 ; then
+ success=true
+ else
+ success=false
+ fi
+
+ if $success || ! grep --quiet "$expected" $dir/out ; then
+ cat $dir/out >&2
+ return 1
+ else
+ return 0
+ fi
+}
+
+function test_expect_failure() {
+ local dir=$1
+
+ setup $dir || return 1
+ expect_failure $dir FAIL bash -c 'echo FAIL ; exit 1' || return 1
+ # the command did not fail
+ ! expect_failure $dir FAIL bash -c 'echo FAIL ; exit 0' > $dir/out || return 1
+ grep --quiet FAIL $dir/out || return 1
+ # the command failed but the output does not contain the expected string
+ ! expect_failure $dir FAIL bash -c 'echo UNEXPECTED ; exit 1' > $dir/out || return 1
+ ! grep --quiet FAIL $dir/out || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Given the *last_scrub*, wait for scrub to happen on **pgid**. It
+# will fail if scrub does not complete within $TIMEOUT seconds. The
+# repair is complete whenever the **get_last_scrub_stamp** function
+# reports a timestamp different from the one given in argument.
+#
+# @param pgid the id of the PG
+# @param last_scrub timestamp of the last scrub for *pgid*
+# @return 0 on success, 1 on error
+#
+function wait_for_scrub() {
+ local pgid=$1
+ local last_scrub="$2"
+ local sname=${3:-last_scrub_stamp}
+
+ for ((i=0; i < $TIMEOUT; i++)); do
+ if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then
+ return 0
+ fi
+ sleep 1
+ done
+ return 1
+}
+
+function test_wait_for_scrub() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ local pgid=1.0
+ ceph pg repair $pgid
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ wait_for_scrub $pgid "$last_scrub" || return 1
+ kill_daemons $dir KILL osd || return 1
+ last_scrub=$(get_last_scrub_stamp $pgid)
+ ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return 0 if the erasure code *plugin* is available, 1 otherwise.
+#
+# @param plugin erasure code plugin
+# @return 0 on success, 1 on error
+#
+
+function erasure_code_plugin_exists() {
+ local plugin=$1
+ local status
+ local grepstr
+ local s
+ case `uname` in
+ FreeBSD) grepstr="Cannot open.*$plugin" ;;
+ *) grepstr="$plugin.*No such file" ;;
+ esac
+
+ s=$(ceph osd erasure-code-profile set TESTPROFILE plugin=$plugin 2>&1)
+ local status=$?
+ if [ $status -eq 0 ]; then
+ ceph osd erasure-code-profile rm TESTPROFILE
+ elif ! echo $s | grep --quiet "$grepstr" ; then
+ status=1
+ # display why the string was rejected.
+ echo $s
+ fi
+ return $status
+}
+
+function test_erasure_code_plugin_exists() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ erasure_code_plugin_exists jerasure || return 1
+ ! erasure_code_plugin_exists FAKE || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Display all log files from **dir** on stdout.
+#
+# @param dir directory in which all data is stored
+#
+
+function display_logs() {
+ local dir=$1
+
+ find $dir -maxdepth 1 -name '*.log' | \
+ while read file ; do
+ echo "======================= $file"
+ cat $file
+ done
+}
+
+function test_display_logs() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ kill_daemons $dir || return 1
+ display_logs $dir > $dir/log.out
+ grep --quiet mon.a.log $dir/log.out || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+##
+# Spawn a command in background and save the pid in the variable name
+# passed in argument. To make the output reading easier, the output is
+# prepend with the process id.
+#
+# Example:
+# pids1=""
+# run_in_background pids1 bash -c 'sleep 1; exit 1'
+#
+# @param pid_variable the variable name (not value) where the pids will be stored
+# @param ... the command to execute
+# @return only the pid_variable output should be considered and used with **wait_background**
+#
+function run_in_background() {
+ local pid_variable=$1
+ shift
+ # Execute the command and prepend the output with its pid
+ # We enforce to return the exit status of the command and not the sed one.
+ ("$@" |& sed 's/^/'$BASHPID': /'; return "${PIPESTATUS[0]}") >&2 &
+ eval "$pid_variable+=\" $!\""
+}
+
+function save_stdout {
+ local out="$1"
+ shift
+ "$@" > "$out"
+}
+
+function test_run_in_background() {
+ local pids
+ run_in_background pids sleep 1
+ run_in_background pids sleep 1
+ test $(echo $pids | wc -w) = 2 || return 1
+ wait $pids || return 1
+}
+
+#######################################################################
+##
+# Wait for pids running in background to complete.
+# This function is usually used after a **run_in_background** call
+# Example:
+# pids1=""
+# run_in_background pids1 bash -c 'sleep 1; exit 1'
+# wait_background pids1
+#
+# @param pids The variable name that contains the active PIDS. Set as empty at then end of the function.
+# @return returns 1 if at least one process exits in error unless returns 0
+#
+function wait_background() {
+ # We extract the PIDS from the variable name
+ pids=${!1}
+
+ return_code=0
+ for pid in $pids; do
+ if ! wait $pid; then
+ # If one process failed then return 1
+ return_code=1
+ fi
+ done
+
+ # We empty the variable reporting that all process ended
+ eval "$1=''"
+
+ return $return_code
+}
+
+
+function test_wait_background() {
+ local pids=""
+ run_in_background pids bash -c "sleep 1; exit 1"
+ run_in_background pids bash -c "sleep 2; exit 0"
+ wait_background pids
+ if [ $? -ne 1 ]; then return 1; fi
+
+ run_in_background pids bash -c "sleep 1; exit 0"
+ run_in_background pids bash -c "sleep 2; exit 0"
+ wait_background pids
+ if [ $? -ne 0 ]; then return 1; fi
+
+ if [ ! -z "$pids" ]; then return 1; fi
+}
+
+function flush_pg_stats()
+{
+ local timeout=${1:-$TIMEOUT}
+
+ ids=`ceph osd ls`
+ seqs=''
+ for osd in $ids; do
+ seq=`ceph tell osd.$osd flush_pg_stats`
+ if test -z "$seq"
+ then
+ continue
+ fi
+ seqs="$seqs $osd-$seq"
+ done
+
+ for s in $seqs; do
+ osd=`echo $s | cut -d - -f 1`
+ seq=`echo $s | cut -d - -f 2`
+ echo "waiting osd.$osd seq $seq"
+ while test $(ceph osd last-stat-seq $osd) -lt $seq; do
+ sleep 1
+ if [ $((timeout--)) -eq 0 ]; then
+ return 1
+ fi
+ done
+ done
+}
+
+function test_flush_pg_stats()
+{
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ rados -p rbd put obj /etc/group
+ flush_pg_stats || return 1
+ local jq_filter='.pools | .[] | select(.name == "rbd") | .stats'
+ stored=`ceph df detail --format=json | jq "$jq_filter.stored"`
+ stored_raw=`ceph df detail --format=json | jq "$jq_filter.stored_raw"`
+ test $stored -gt 0 || return 1
+ test $stored == $stored_raw || return 1
+ teardown $dir
+}
+
+########################################################################
+##
+# Get the current op scheduler enabled on an osd by reading the
+# osd_op_queue config option
+#
+# Example:
+# get_op_scheduler $osdid
+#
+# @param id the id of the OSD
+# @return the name of the op scheduler enabled for the OSD
+#
+function get_op_scheduler() {
+ local id=$1
+
+ get_config osd $id osd_op_queue
+}
+
+function test_get_op_scheduler() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=wpq || return 1
+ test $(get_op_scheduler 0) = "wpq" || return 1
+
+ run_osd $dir 1 --osd_op_queue=mclock_scheduler || return 1
+ test $(get_op_scheduler 1) = "mclock_scheduler" || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Call the **run** function (which must be defined by the caller) with
+# the **dir** argument followed by the caller argument list.
+#
+# If the **run** function returns on error, all logs found in **dir**
+# are displayed for diagnostic purposes.
+#
+# **teardown** function is called when the **run** function returns
+# (on success or on error), to cleanup leftovers. The CEPH_CONF is set
+# to /dev/null and CEPH_ARGS is unset so that the tests are protected from
+# external interferences.
+#
+# It is the responsibility of the **run** function to call the
+# **setup** function to prepare the test environment (create a temporary
+# directory etc.).
+#
+# The shell is required (via PS4) to display the function and line
+# number whenever a statement is executed to help debugging.
+#
+# @param dir directory in which all data is stored
+# @param ... arguments passed transparently to **run**
+# @return 0 on success, 1 on error
+#
+function main() {
+ local dir=td/$1
+ shift
+
+ shopt -s -o xtrace
+ PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: '
+
+ export PATH=.:$PATH # make sure program from sources are preferred
+ export PYTHONWARNINGS=ignore
+ export CEPH_CONF=/dev/null
+ unset CEPH_ARGS
+
+ local code
+ if run $dir "$@" ; then
+ code=0
+ else
+ code=1
+ fi
+ teardown $dir $code || return 1
+ return $code
+}
+
+#######################################################################
+
+function run_tests() {
+ shopt -s -o xtrace
+ PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: '
+
+ export .:$PATH # make sure program from sources are preferred
+
+ export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ export CEPH_CONF=/dev/null
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(test_[0-9a-z_]*\) .*/\1/p')}
+ local dir=td/ceph-helpers
+
+ for func in $funcs ; do
+ if ! $func $dir; then
+ teardown $dir 1
+ return 1
+ fi
+ done
+}
+
+if test "$1" = TESTS ; then
+ shift
+ run_tests "$@"
+ exit $?
+fi
+
+# NOTE:
+# jq only support --exit-status|-e from version 1.4 forwards, which makes
+# returning on error waaaay prettier and straightforward.
+# However, the current automated upstream build is running with v1.3,
+# which has no idea what -e is. Hence the convoluted error checking we
+# need. Sad.
+# The next time someone changes this code, please check if v1.4 is now
+# a thing, and, if so, please change these to use -e. Thanks.
+
+# jq '.all.supported | select([.[] == "foo"] | any)'
+function jq_success() {
+ input="$1"
+ filter="$2"
+ expects="\"$3\""
+
+ in_escaped=$(printf %s "$input" | sed "s/'/'\\\\''/g")
+ filter_escaped=$(printf %s "$filter" | sed "s/'/'\\\\''/g")
+
+ ret=$(echo "$in_escaped" | jq "$filter_escaped")
+ if [[ "$ret" == "true" ]]; then
+ return 0
+ elif [[ -n "$expects" ]]; then
+ if [[ "$ret" == "$expects" ]]; then
+ return 0
+ fi
+ fi
+ return 1
+ input=$1
+ filter=$2
+ expects="$3"
+
+ ret="$(echo $input | jq \"$filter\")"
+ if [[ "$ret" == "true" ]]; then
+ return 0
+ elif [[ -n "$expects" && "$ret" == "$expects" ]]; then
+ return 0
+ fi
+ return 1
+}
+
+function inject_eio() {
+ local pooltype=$1
+ shift
+ local which=$1
+ shift
+ local poolname=$1
+ shift
+ local objname=$1
+ shift
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local osd_id=${initial_osds[$shard_id]}
+ if [ "$pooltype" != "ec" ]; then
+ shard_id=""
+ fi
+ type=$(cat $dir/$osd_id/type)
+ set_config osd $osd_id ${type}_debug_inject_read_err true || return 1
+ local loop=0
+ while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \
+ inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do
+ loop=$(expr $loop + 1)
+ if [ $loop = "10" ]; then
+ return 1
+ fi
+ sleep 1
+ done
+}
+
+function multidiff() {
+ if ! diff $@ ; then
+ if [ "$DIFFCOLOPTS" = "" ]; then
+ return 1
+ fi
+ diff $DIFFCOLOPTS $@
+ fi
+}
+
+function create_ec_pool() {
+ local pool_name=$1
+ shift
+ local allow_overwrites=$1
+ shift
+
+ ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1
+
+ create_pool "$poolname" 1 1 erasure myprofile || return 1
+
+ if [ "$allow_overwrites" = "true" ]; then
+ ceph osd pool set "$poolname" allow_ec_overwrites true || return 1
+ fi
+
+ wait_for_clean || return 1
+ return 0
+}
+
+# Local Variables:
+# compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config"
+# End:
diff --git a/qa/standalone/crush/crush-choose-args.sh b/qa/standalone/crush/crush-choose-args.sh
new file mode 100755
index 000000000..ee548db12
--- /dev/null
+++ b/qa/standalone/crush/crush-choose-args.sh
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7131" # git grep '\<7131\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--crush-location=root=default,host=HOST "
+ CEPH_ARGS+="--osd-crush-initial-weight=3 "
+ #
+ # Disable device auto class feature for now.
+ # The device class is non-deterministic and will
+ # crash the crushmap comparison below.
+ #
+ CEPH_ARGS+="--osd-class-update-on-start=false "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_choose_args_update() {
+ #
+ # adding a weighted OSD updates the weight up to the top
+ #
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd set-require-min-compat-client luminous
+ ceph osd getcrushmap > $dir/map || return 1
+ crushtool -d $dir/map -o $dir/map.txt || return 1
+ sed -i -e '/end crush map/d' $dir/map.txt
+ cat >> $dir/map.txt <<EOF
+# choose_args
+choose_args 0 {
+ {
+ bucket_id -1
+ weight_set [
+ [ 2.00000 ]
+ [ 2.00000 ]
+ ]
+ ids [ -10 ]
+ }
+ {
+ bucket_id -2
+ weight_set [
+ [ 2.00000 ]
+ [ 2.00000 ]
+ ]
+ ids [ -20 ]
+ }
+}
+
+# end crush map
+EOF
+ crushtool -c $dir/map.txt -o $dir/map-new || return 1
+ ceph osd setcrushmap -i $dir/map-new || return 1
+ ceph osd crush tree
+
+ run_osd $dir 1 || return 1
+ ceph osd crush tree
+ ceph osd getcrushmap > $dir/map-one-more || return 1
+ crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1
+ cat $dir/map-one-more.txt
+ diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-3.txt || return 1
+
+ destroy_osd $dir 1 || return 1
+ ceph osd crush tree
+ ceph osd getcrushmap > $dir/map-one-less || return 1
+ crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1
+ diff -u $dir/map-one-less.txt $dir/map.txt || return 1
+}
+
+function TEST_no_update_weight_set() {
+ #
+ # adding a zero weight OSD does not update the weight set at all
+ #
+ local dir=$1
+
+ ORIG_CEPH_ARGS="$CEPH_ARGS"
+ CEPH_ARGS+="--osd-crush-update-weight-set=false "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd set-require-min-compat-client luminous
+ ceph osd crush tree
+ ceph osd getcrushmap > $dir/map || return 1
+ crushtool -d $dir/map -o $dir/map.txt || return 1
+ sed -i -e '/end crush map/d' $dir/map.txt
+ cat >> $dir/map.txt <<EOF
+# choose_args
+choose_args 0 {
+ {
+ bucket_id -1
+ weight_set [
+ [ 2.00000 ]
+ [ 1.00000 ]
+ ]
+ ids [ -10 ]
+ }
+ {
+ bucket_id -2
+ weight_set [
+ [ 2.00000 ]
+ [ 1.00000 ]
+ ]
+ ids [ -20 ]
+ }
+}
+
+# end crush map
+EOF
+ crushtool -c $dir/map.txt -o $dir/map-new || return 1
+ ceph osd setcrushmap -i $dir/map-new || return 1
+ ceph osd crush tree
+
+
+ run_osd $dir 1 || return 1
+ ceph osd crush tree
+ ceph osd getcrushmap > $dir/map-one-more || return 1
+ crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1
+ cat $dir/map-one-more.txt
+ diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-0.txt || return 1
+
+ destroy_osd $dir 1 || return 1
+ ceph osd crush tree
+ ceph osd getcrushmap > $dir/map-one-less || return 1
+ crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1
+ diff -u $dir/map-one-less.txt $dir/map.txt || return 1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS"
+}
+
+function TEST_reweight() {
+ # reweight and reweight-compat behave appropriately
+ local dir=$1
+
+ ORIG_CEPH_ARGS="$CEPH_ARGS"
+ CEPH_ARGS+="--osd-crush-update-weight-set=false "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+
+ ceph osd crush weight-set create-compat || return 1
+ ceph osd crush tree
+
+ ceph osd crush weight-set reweight-compat osd.0 2 || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep host | grep '6.00000 5.00000' || return 1
+
+ run_osd $dir 2 || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep host | grep '9.00000 5.00000' || return 1
+
+ ceph osd crush reweight osd.2 4
+ ceph osd crush tree
+ ceph osd crush tree | grep host | grep '10.00000 5.00000' || return 1
+
+ ceph osd crush weight-set reweight-compat osd.2 4
+ ceph osd crush tree
+ ceph osd crush tree | grep host | grep '10.00000 9.00000' || return 1
+}
+
+function TEST_move_bucket() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+
+ ceph osd crush weight-set create-compat || return 1
+ ceph osd crush weight-set reweight-compat osd.0 2 || return 1
+ ceph osd crush weight-set reweight-compat osd.1 2 || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep HOST | grep '6.00000 4.00000' || return 1
+
+ # moving a bucket adjusts the weights
+ ceph osd crush add-bucket RACK rack root=default || return 1
+ ceph osd crush move HOST rack=RACK || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep HOST | grep '6.00000 4.00000' || return 1
+ ceph osd crush tree | grep RACK | grep '6.00000 4.00000' || return 1
+
+ # weight-set reweight adjusts containing buckets
+ ceph osd crush weight-set reweight-compat osd.0 1 || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep HOST | grep '6.00000 3.00000' || return 1
+ ceph osd crush tree | grep RACK | grep '6.00000 3.00000' || return 1
+
+ # moving a leaf resets its weight-set to the canonical weight...
+ ceph config set mon osd_crush_update_weight_set true || return 1
+ ceph osd crush add-bucket FOO host root=default || return 1
+ ceph osd crush move osd.0 host=FOO || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep osd.0 | grep '3.00000 3.00000' || return 1
+ ceph osd crush tree | grep HOST | grep '3.00000 2.00000' || return 1
+ ceph osd crush tree | grep RACK | grep '3.00000 2.00000' || return 1
+
+ # ...or to zero.
+ ceph config set mon osd_crush_update_weight_set false || return 1
+ ceph osd crush move osd.1 host=FOO || return 1
+ ceph osd crush tree
+ ceph osd crush tree | grep osd.0 | grep '3.00000 3.00000' || return 1
+ ceph osd crush tree | grep osd.1 | grep '3.00000 0' || return 1
+ ceph osd crush tree | grep FOO | grep '6.00000 3.00000' || return 1
+}
+
+main crush-choose-args "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-choose-args.sh"
+# End:
diff --git a/qa/standalone/crush/crush-classes.sh b/qa/standalone/crush/crush-classes.sh
new file mode 100755
index 000000000..558aabe6d
--- /dev/null
+++ b/qa/standalone/crush/crush-classes.sh
@@ -0,0 +1,265 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7130" # git grep '\<7130\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ #
+ # Disable auto-class, so we can inject device class manually below
+ #
+ CEPH_ARGS+="--osd-class-update-on-start=false "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function add_something() {
+ local dir=$1
+ local obj=${2:-SOMETHING}
+
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ rados --pool rbd put $obj $dir/ORIGINAL || return 1
+}
+
+function get_osds_up() {
+ local poolname=$1
+ local objectname=$2
+
+ local osds=$(ceph --format xml osd map $poolname $objectname 2>/dev/null | \
+ $XMLSTARLET sel -t -m "//up/osd" -v . -o ' ')
+ # get rid of the trailing space
+ echo $osds
+}
+
+function TEST_reweight_vs_classes() {
+ local dir=$1
+
+ # CrushWrapper::update_item (and ceph osd crush set) must rebuild the shadow
+ # tree too. https://tracker.ceph.com/issues/48065
+
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd crush set-device-class ssd osd.0 || return 1
+ ceph osd crush class ls-osd ssd | grep 0 || return 1
+ ceph osd crush set-device-class ssd osd.1 || return 1
+ ceph osd crush class ls-osd ssd | grep 1 || return 1
+
+ ceph osd crush reweight osd.0 1
+
+ h=`hostname -s`
+ ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 65536
+ ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 65536
+
+ ceph osd crush set 0 2 host=$h
+
+ ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 131072
+ ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 131072
+}
+
+function TEST_classes() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ create_rbd_pool || return 1
+
+ test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1
+ add_something $dir SOMETHING || return 1
+
+ #
+ # osd.0 has class ssd and the rule is modified
+ # to only take ssd devices.
+ #
+ ceph osd getcrushmap > $dir/map || return 1
+ crushtool -d $dir/map -o $dir/map.txt || return 1
+ ${SED} -i \
+ -e '/device 0 osd.0/s/$/ class ssd/' \
+ -e '/step take default/s/$/ class ssd/' \
+ $dir/map.txt || return 1
+ crushtool -c $dir/map.txt -o $dir/map-new || return 1
+ ceph osd setcrushmap -i $dir/map-new || return 1
+
+ #
+ # There can only be one mapping since there only is
+ # one device with ssd class.
+ #
+ ok=false
+ for delay in 2 4 8 16 32 64 128 256 ; do
+ if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0" ; then
+ ok=true
+ break
+ fi
+ sleep $delay
+ ceph osd dump # for debugging purposes
+ ceph pg dump # for debugging purposes
+ done
+ $ok || return 1
+ #
+ # Writing keeps working because the pool is min_size 1 by
+ # default.
+ #
+ add_something $dir SOMETHING_ELSE || return 1
+
+ #
+ # Sanity check that the rule indeed has ssd
+ # generated bucket with a name including ~ssd.
+ #
+ ceph osd crush dump | grep -q '~ssd' || return 1
+}
+
+function TEST_set_device_class() {
+ local dir=$1
+
+ TEST_classes $dir || return 1
+
+ ceph osd crush set-device-class ssd osd.0 || return 1
+ ceph osd crush class ls-osd ssd | grep 0 || return 1
+ ceph osd crush set-device-class ssd osd.1 || return 1
+ ceph osd crush class ls-osd ssd | grep 1 || return 1
+ ceph osd crush set-device-class ssd 0 1 || return 1 # should be idempotent
+
+ ok=false
+ for delay in 2 4 8 16 32 64 128 256 ; do
+ if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0 1" ; then
+ ok=true
+ break
+ fi
+ sleep $delay
+ ceph osd crush dump
+ ceph osd dump # for debugging purposes
+ ceph pg dump # for debugging purposes
+ done
+ $ok || return 1
+}
+
+function TEST_mon_classes() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ create_rbd_pool || return 1
+
+ test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1
+ add_something $dir SOMETHING || return 1
+
+ # test create and remove class
+ ceph osd crush class create CLASS || return 1
+ ceph osd crush class create CLASS || return 1 # idempotent
+ ceph osd crush class ls | grep CLASS || return 1
+ ceph osd crush class rename CLASS TEMP || return 1
+ ceph osd crush class ls | grep TEMP || return 1
+ ceph osd crush class rename TEMP CLASS || return 1
+ ceph osd crush class ls | grep CLASS || return 1
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd crush-device-class=CLASS || return 1
+ expect_failure $dir EBUSY ceph osd crush class rm CLASS || return 1
+ ceph osd erasure-code-profile rm myprofile || return 1
+ ceph osd crush class rm CLASS || return 1
+ ceph osd crush class rm CLASS || return 1 # test idempotence
+
+ # test rm-device-class
+ ceph osd crush set-device-class aaa osd.0 || return 1
+ ceph osd tree | grep -q 'aaa' || return 1
+ ceph osd crush dump | grep -q '~aaa' || return 1
+ ceph osd crush tree --show-shadow | grep -q '~aaa' || return 1
+ ceph osd crush set-device-class bbb osd.1 || return 1
+ ceph osd tree | grep -q 'bbb' || return 1
+ ceph osd crush dump | grep -q '~bbb' || return 1
+ ceph osd crush tree --show-shadow | grep -q '~bbb' || return 1
+ ceph osd crush set-device-class ccc osd.2 || return 1
+ ceph osd tree | grep -q 'ccc' || return 1
+ ceph osd crush dump | grep -q '~ccc' || return 1
+ ceph osd crush tree --show-shadow | grep -q '~ccc' || return 1
+ ceph osd crush rm-device-class 0 || return 1
+ ceph osd tree | grep -q 'aaa' && return 1
+ ceph osd crush class ls | grep -q 'aaa' && return 1 # class 'aaa' should gone
+ ceph osd crush rm-device-class 1 || return 1
+ ceph osd tree | grep -q 'bbb' && return 1
+ ceph osd crush class ls | grep -q 'bbb' && return 1 # class 'bbb' should gone
+ ceph osd crush rm-device-class 2 || return 1
+ ceph osd tree | grep -q 'ccc' && return 1
+ ceph osd crush class ls | grep -q 'ccc' && return 1 # class 'ccc' should gone
+ ceph osd crush set-device-class asdf all || return 1
+ ceph osd tree | grep -q 'asdf' || return 1
+ ceph osd crush dump | grep -q '~asdf' || return 1
+ ceph osd crush tree --show-shadow | grep -q '~asdf' || return 1
+ ceph osd crush rule create-replicated asdf-rule default host asdf || return 1
+ ceph osd crush rm-device-class all || return 1
+ ceph osd tree | grep -q 'asdf' && return 1
+ ceph osd crush class ls | grep -q 'asdf' || return 1 # still referenced by asdf-rule
+
+ ceph osd crush set-device-class abc osd.2 || return 1
+ ceph osd crush move osd.2 root=foo rack=foo-rack host=foo-host || return 1
+ out=`ceph osd tree |awk '$1 == 2 && $2 == "abc" {print $0}'`
+ if [ "$out" == "" ]; then
+ return 1
+ fi
+
+ # verify 'crush move' too
+ ceph osd crush dump | grep -q 'foo~abc' || return 1
+ ceph osd crush tree --show-shadow | grep -q 'foo~abc' || return 1
+ ceph osd crush dump | grep -q 'foo-rack~abc' || return 1
+ ceph osd crush tree --show-shadow | grep -q 'foo-rack~abc' || return 1
+ ceph osd crush dump | grep -q 'foo-host~abc' || return 1
+ ceph osd crush tree --show-shadow | grep -q 'foo-host~abc' || return 1
+ ceph osd crush rm-device-class osd.2 || return 1
+ # restore class, so we can continue to test create-replicated
+ ceph osd crush set-device-class abc osd.2 || return 1
+
+ ceph osd crush rule create-replicated foo-rule foo host abc || return 1
+
+ # test set-device-class implicitly change class
+ ceph osd crush set-device-class hdd osd.0 || return 1
+ expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1
+
+ # test class rename
+ ceph osd crush rm-device-class all || return 1
+ ceph osd crush set-device-class class_1 all || return 1
+ ceph osd crush class ls | grep 'class_1' || return 1
+ ceph osd crush tree --show-shadow | grep 'class_1' || return 1
+ ceph osd crush rule create-replicated class_1_rule default host class_1 || return 1
+ ceph osd crush class rename class_1 class_2
+ ceph osd crush class rename class_1 class_2 # idempotent
+ ceph osd crush class ls | grep 'class_1' && return 1
+ ceph osd crush tree --show-shadow | grep 'class_1' && return 1
+ ceph osd crush class ls | grep 'class_2' || return 1
+ ceph osd crush tree --show-shadow | grep 'class_2' || return 1
+}
+
+main crush-classes "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-classes.sh"
+# End:
diff --git a/qa/standalone/erasure-code/test-erasure-code-plugins.sh b/qa/standalone/erasure-code/test-erasure-code-plugins.sh
new file mode 100755
index 000000000..b5648d472
--- /dev/null
+++ b/qa/standalone/erasure-code/test-erasure-code-plugins.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+set -x
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+arch=$(uname -m)
+
+case $arch in
+ i[[3456]]86*|x86_64*|amd64*)
+ legacy_jerasure_plugins=(jerasure_generic jerasure_sse3 jerasure_sse4)
+ legacy_shec_plugins=(shec_generic shec_sse3 shec_sse4)
+ plugins=(jerasure shec lrc isa)
+ ;;
+ aarch64*|arm*)
+ legacy_jerasure_plugins=(jerasure_generic jerasure_neon)
+ legacy_shec_plugins=(shec_generic shec_neon)
+ plugins=(jerasure shec lrc)
+ ;;
+ *)
+ echo "unsupported platform ${arch}."
+ return 1
+ ;;
+esac
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:17110" # git grep '\<17110\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+function TEST_preload_warning() {
+ local dir=$1
+
+ for plugin in ${legacy_jerasure_plugins[*]} ${legacy_shec_plugins[*]}; do
+ setup $dir || return 1
+ run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1
+ run_mgr $dir x || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/mon.a.log || return 1
+ grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/osd.0.log || return 1
+ teardown $dir || return 1
+ done
+ return 0
+}
+
+function TEST_preload_no_warning() {
+ local dir=$1
+
+ for plugin in ${plugins[*]}; do
+ setup $dir || return 1
+ run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1
+ run_mgr $dir x || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/mon.a.log || return 1
+ ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/osd.0.log || return 1
+ teardown $dir || return 1
+ done
+
+ return 0
+}
+
+function TEST_preload_no_warning_default() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ ! grep "WARNING: osd_erasure_code_plugins" $dir/mon.a.log || return 1
+ ! grep "WARNING: osd_erasure_code_plugins" $dir/osd.0.log || return 1
+ teardown $dir || return 1
+
+ return 0
+}
+
+function TEST_ec_profile_warning() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ for plugin in ${legacy_jerasure_plugins[*]}; do
+ ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd technique=reed_sol_van plugin=${plugin} || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1
+ done
+
+ for plugin in ${legacy_shec_plugins[*]}; do
+ ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd plugin=${plugin} || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1
+ done
+
+ teardown $dir || return 1
+}
+
+main test-erasure-code-plugins "$@"
diff --git a/qa/standalone/erasure-code/test-erasure-code.sh b/qa/standalone/erasure-code/test-erasure-code.sh
new file mode 100755
index 000000000..b93151233
--- /dev/null
+++ b/qa/standalone/erasure-code/test-erasure-code.sh
@@ -0,0 +1,337 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7101" # git grep '\<7101\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --mon-osd-prime-pg-temp=false"
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ # check that erasure code plugins are preloaded
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
+ for id in $(seq 0 10) ; do
+ run_osd $dir $id || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ # check that erasure code plugins are preloaded
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
+ create_erasure_coded_pool ecpool || return 1
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+
+ delete_pool ecpool || return 1
+ teardown $dir || return 1
+}
+
+function create_erasure_coded_pool() {
+ local poolname=$1
+
+ ceph osd erasure-code-profile set myprofile \
+ crush-failure-domain=osd || return 1
+ create_pool $poolname 12 12 erasure myprofile \
+ || return 1
+ wait_for_clean || return 1
+}
+
+function rados_put_get() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+
+
+ for marker in AAA BBB CCCC DDDD ; do
+ printf "%*s" 1024 $marker
+ done > $dir/ORIGINAL
+
+ #
+ # get and put an object, compare they are equal
+ #
+ rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ rm $dir/COPY
+
+ #
+ # take out an OSD used to store the object and
+ # check the object can still be retrieved, which implies
+ # recovery
+ #
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last=$((${#initial_osds[@]} - 1))
+ ceph osd out ${initial_osds[$last]} || return 1
+
+ # give the osdmap up to 5 seconds to refresh
+ sleep 5
+ ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1
+
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ ceph osd in ${initial_osds[$last]} || return 1
+
+ rm $dir/ORIGINAL
+}
+
+function rados_osds_out_in() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+
+
+ for marker in FFFF GGGG HHHH IIII ; do
+ printf "%*s" 1024 $marker
+ done > $dir/ORIGINAL
+
+ #
+ # get and put an object, compare they are equal
+ #
+ rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ rm $dir/COPY
+
+ #
+ # take out two OSDs used to store the object, wait for the cluster
+ # to be clean (i.e. all PG are clean and active) again which
+ # implies the PG have been moved to use the remaining OSDs. Check
+ # the object can still be retrieved.
+ #
+ wait_for_clean || return 1
+ local osds_list=$(get_osds $poolname $objname)
+ local -a osds=($osds_list)
+ for osd in 0 1 ; do
+ ceph osd out ${osds[$osd]} || return 1
+ done
+ wait_for_clean || return 1
+ #
+ # verify the object is no longer mapped to the osds that are out
+ #
+ for osd in 0 1 ; do
+ ! get_osds $poolname $objname | grep '\<'${osds[$osd]}'\>' || return 1
+ done
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ #
+ # bring the osds back in, , wait for the cluster
+ # to be clean (i.e. all PG are clean and active) again which
+ # implies the PG go back to using the same osds as before
+ #
+ for osd in 0 1 ; do
+ ceph osd in ${osds[$osd]} || return 1
+ done
+ wait_for_clean || return 1
+ test "$osds_list" = "$(get_osds $poolname $objname)" || return 1
+ rm $dir/ORIGINAL
+}
+
+function TEST_rados_put_get_lrc_advanced() {
+ local dir=$1
+ local poolname=pool-lrc-a
+ local profile=profile-lrc-a
+
+ ceph osd erasure-code-profile set $profile \
+ plugin=lrc \
+ mapping=DD_ \
+ crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \
+ layers='[ [ "DDc", "" ] ]' || return 1
+ create_pool $poolname 12 12 erasure $profile \
+ || return 1
+
+ rados_put_get $dir $poolname || return 1
+
+ delete_pool $poolname
+ ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_rados_put_get_lrc_kml() {
+ local dir=$1
+ local poolname=pool-lrc
+ local profile=profile-lrc
+
+ ceph osd erasure-code-profile set $profile \
+ plugin=lrc \
+ k=4 m=2 l=3 \
+ crush-failure-domain=osd || return 1
+ create_pool $poolname 12 12 erasure $profile \
+ || return 1
+
+ rados_put_get $dir $poolname || return 1
+
+ delete_pool $poolname
+ ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_rados_put_get_isa() {
+ if ! erasure_code_plugin_exists isa ; then
+ echo "SKIP because plugin isa has not been built"
+ return 0
+ fi
+ local dir=$1
+ local poolname=pool-isa
+
+ ceph osd erasure-code-profile set profile-isa \
+ plugin=isa \
+ crush-failure-domain=osd || return 1
+ create_pool $poolname 1 1 erasure profile-isa \
+ || return 1
+
+ rados_put_get $dir $poolname || return 1
+
+ delete_pool $poolname
+}
+
+function TEST_rados_put_get_jerasure() {
+ local dir=$1
+
+ rados_put_get $dir ecpool || return 1
+
+ local poolname=pool-jerasure
+ local profile=profile-jerasure
+
+ ceph osd erasure-code-profile set $profile \
+ plugin=jerasure \
+ k=4 m=2 \
+ crush-failure-domain=osd || return 1
+ create_pool $poolname 12 12 erasure $profile \
+ || return 1
+
+ rados_put_get $dir $poolname || return 1
+ rados_osds_out_in $dir $poolname || return 1
+
+ delete_pool $poolname
+ ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_rados_put_get_shec() {
+ local dir=$1
+
+ local poolname=pool-shec
+ local profile=profile-shec
+
+ ceph osd erasure-code-profile set $profile \
+ plugin=shec \
+ k=2 m=1 c=1 \
+ crush-failure-domain=osd || return 1
+ create_pool $poolname 12 12 erasure $profile \
+ || return 1
+
+ rados_put_get $dir $poolname || return 1
+
+ delete_pool $poolname
+ ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_alignment_constraints() {
+ local payload=ABC
+ echo "$payload" > $dir/ORIGINAL
+ #
+ # Verify that the rados command enforces alignment constraints
+ # imposed by the stripe width
+ # See http://tracker.ceph.com/issues/8622
+ #
+ local stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit)
+ eval local $(ceph osd erasure-code-profile get myprofile | grep k=)
+ local block_size=$((stripe_unit * k - 1))
+ dd if=/dev/zero of=$dir/ORIGINAL bs=$block_size count=2
+ rados --block-size=$block_size \
+ --pool ecpool put UNALIGNED $dir/ORIGINAL || return 1
+ rm $dir/ORIGINAL
+}
+
+function chunk_size() {
+ echo $(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit)
+}
+
+#
+# By default an object will be split in two (k=2) with the first part
+# of the object in the first OSD of the up set and the second part in
+# the next OSD in the up set. This layout is defined by the mapping
+# parameter and this function helps verify that the first and second
+# part of the object are located in the OSD where they should be.
+#
+function verify_chunk_mapping() {
+ local dir=$1
+ local poolname=$2
+ local first=$3
+ local second=$4
+
+ local payload=$(printf '%*s' $(chunk_size) FIRST$poolname ; printf '%*s' $(chunk_size) SECOND$poolname)
+ echo -n "$payload" > $dir/ORIGINAL
+
+ rados --pool $poolname put SOMETHING$poolname $dir/ORIGINAL || return 1
+ rados --pool $poolname get SOMETHING$poolname $dir/COPY || return 1
+ local -a osds=($(get_osds $poolname SOMETHING$poolname))
+ for (( i = 0; i < ${#osds[@]}; i++ )) ; do
+ ceph daemon osd.${osds[$i]} flush_journal
+ done
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ rm $dir/COPY
+
+ local -a osds=($(get_osds $poolname SOMETHING$poolname))
+ objectstore_tool $dir ${osds[$first]} SOMETHING$poolname get-bytes | grep --quiet FIRST$poolname || return 1
+ objectstore_tool $dir ${osds[$second]} SOMETHING$poolname get-bytes | grep --quiet SECOND$poolname || return 1
+}
+
+function TEST_chunk_mapping() {
+ local dir=$1
+
+ #
+ # mapping=DD_ is the default:
+ # first OSD (i.e. 0) in the up set has the first part of the object
+ # second OSD (i.e. 1) in the up set has the second part of the object
+ #
+ verify_chunk_mapping $dir ecpool 0 1 || return 1
+
+ ceph osd erasure-code-profile set remap-profile \
+ plugin=lrc \
+ layers='[ [ "cDD", "" ] ]' \
+ mapping='_DD' \
+ crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1
+ ceph osd erasure-code-profile get remap-profile
+ create_pool remap-pool 12 12 erasure remap-profile \
+ || return 1
+
+ #
+ # mapping=_DD
+ # second OSD (i.e. 1) in the up set has the first part of the object
+ # third OSD (i.e. 2) in the up set has the second part of the object
+ #
+ verify_chunk_mapping $dir remap-pool 1 2 || return 1
+
+ delete_pool remap-pool
+ ceph osd erasure-code-profile rm remap-profile
+}
+
+main test-erasure-code "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-code.sh"
+# End:
diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh
new file mode 100755
index 000000000..42c538eb9
--- /dev/null
+++ b/qa/standalone/erasure-code/test-erasure-eio.sh
@@ -0,0 +1,700 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+#
+# Author: Kefu Chai <kchai@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ create_pool rbd 4 || return 1
+
+ # check that erasure code plugins are preloaded
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function setup_osds() {
+ local count=$1
+ shift
+
+ for id in $(seq 0 $(expr $count - 1)) ; do
+ run_osd $dir $id || return 1
+ done
+
+ # check that erasure code plugins are preloaded
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
+}
+
+function get_state() {
+ local pgid=$1
+ local sname=state
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function create_erasure_coded_pool() {
+ local poolname=$1
+ shift
+ local k=$1
+ shift
+ local m=$1
+ shift
+
+ ceph osd erasure-code-profile set myprofile \
+ plugin=jerasure \
+ k=$k m=$m \
+ crush-failure-domain=osd || return 1
+ create_pool $poolname 1 1 erasure myprofile \
+ || return 1
+ wait_for_clean || return 1
+}
+
+function delete_erasure_coded_pool() {
+ local poolname=$1
+ ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
+ ceph osd erasure-code-profile rm myprofile
+}
+
+function rados_put() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+
+ for marker in AAA BBB CCCC DDDD ; do
+ printf "%*s" 1024 $marker
+ done > $dir/ORIGINAL
+ #
+ # get and put an object, compare they are equal
+ #
+ rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+}
+
+function rados_get() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+ local expect=${4:-ok}
+
+ #
+ # Expect a failure to get object
+ #
+ if [ $expect = "fail" ];
+ then
+ ! rados --pool $poolname get $objname $dir/COPY
+ return
+ fi
+ #
+ # get an object, compare with $dir/ORIGINAL
+ #
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ rm $dir/COPY
+}
+
+
+function inject_remove() {
+ local pooltype=$1
+ shift
+ local which=$1
+ shift
+ local poolname=$1
+ shift
+ local objname=$1
+ shift
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local osd_id=${initial_osds[$shard_id]}
+ objectstore_tool $dir $osd_id $objname remove || return 1
+}
+
+# Test with an inject error
+function rados_put_get_data() {
+ local inject=$1
+ shift
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+ local arg=$1
+
+ # inject eio to speificied shard
+ #
+ local poolname=pool-jerasure
+ local objname=obj-$inject-$$-$shard_id
+ rados_put $dir $poolname $objname || return 1
+ inject_$inject ec data $poolname $objname $dir $shard_id || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ if [ "$arg" = "recovery" ];
+ then
+ #
+ # take out the last OSD used to store the object,
+ # bring it back, and check for clean PGs which means
+ # recovery didn't crash the primary.
+ #
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd out ${last_osd} || return 1
+ ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
+ ceph osd in ${last_osd} || return 1
+ activate_osd $dir ${last_osd} || return 1
+ wait_for_clean || return 1
+ # Won't check for eio on get here -- recovery above might have fixed it
+ else
+ shard_id=$(expr $shard_id + 1)
+ inject_$inject ec data $poolname $objname $dir $shard_id || return 1
+ rados_get $dir $poolname $objname fail || return 1
+ rm $dir/ORIGINAL
+ fi
+
+}
+
+# Change the size of speificied shard
+#
+function set_size() {
+ local objname=$1
+ shift
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+ local bytes=$1
+ shift
+ local mode=${1}
+
+ local poolname=pool-jerasure
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local osd_id=${initial_osds[$shard_id]}
+ ceph osd set noout
+ if [ "$mode" = "add" ];
+ then
+ objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1
+ dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT
+ elif [ "$bytes" = "0" ];
+ then
+ touch $dir/CORRUPT
+ else
+ dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT
+ fi
+ objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1
+ rm -f $dir/CORRUPT
+ ceph osd unset noout
+}
+
+function rados_get_data_bad_size() {
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+ local bytes=$1
+ shift
+ local mode=${1:-set}
+
+ local poolname=pool-jerasure
+ local objname=obj-size-$$-$shard_id-$bytes
+ rados_put $dir $poolname $objname || return 1
+
+ # Change the size of speificied shard
+ #
+ set_size $objname $dir $shard_id $bytes $mode || return 1
+
+ rados_get $dir $poolname $objname || return 1
+
+ # Leave objname and modify another shard
+ shard_id=$(expr $shard_id + 1)
+ set_size $objname $dir $shard_id $bytes $mode || return 1
+ rados_get $dir $poolname $objname fail || return 1
+ rm $dir/ORIGINAL
+}
+
+#
+# These two test cases try to validate the following behavior:
+# For object on EC pool, if there is one shard having read error (
+# either primary or replica), client can still read object.
+#
+# If 2 shards have read errors the client will get an error.
+#
+function TEST_rados_get_subread_eio_shard_0() {
+ local dir=$1
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # inject eio on primary OSD (0) and replica OSD (1)
+ local shard_id=0
+ rados_put_get_data eio $dir $shard_id || return 1
+ delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_subread_eio_shard_1() {
+ local dir=$1
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # inject eio into replicas OSD (1) and OSD (2)
+ local shard_id=1
+ rados_put_get_data eio $dir $shard_id || return 1
+ delete_erasure_coded_pool $poolname
+}
+
+# We don't remove the object from the primary because
+# that just causes it to appear to be missing
+
+function TEST_rados_get_subread_missing() {
+ local dir=$1
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # inject remove into replicas OSD (1) and OSD (2)
+ local shard_id=1
+ rados_put_get_data remove $dir $shard_id || return 1
+ delete_erasure_coded_pool $poolname
+}
+
+#
+#
+# These two test cases try to validate that following behavior:
+# For object on EC pool, if there is one shard which an incorrect
+# size this will cause an internal read error, client can still read object.
+#
+# If 2 shards have incorrect size the client will get an error.
+#
+function TEST_rados_get_bad_size_shard_0() {
+ local dir=$1
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # Set incorrect size into primary OSD (0) and replica OSD (1)
+ local shard_id=0
+ rados_get_data_bad_size $dir $shard_id 10 || return 1
+ rados_get_data_bad_size $dir $shard_id 0 || return 1
+ rados_get_data_bad_size $dir $shard_id 256 add || return 1
+ delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_bad_size_shard_1() {
+ local dir=$1
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # Set incorrect size into replicas OSD (1) and OSD (2)
+ local shard_id=1
+ rados_get_data_bad_size $dir $shard_id 10 || return 1
+ rados_get_data_bad_size $dir $shard_id 0 || return 1
+ rados_get_data_bad_size $dir $shard_id 256 add || return 1
+ delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_with_subreadall_eio_shard_0() {
+ local dir=$1
+ local shard_id=0
+
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # inject eio on primary OSD (0)
+ rados_put_get_data eio $dir $shard_id recovery || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_with_subreadall_eio_shard_1() {
+ local dir=$1
+ local shard_id=1
+
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # inject eio on replica OSD (1)
+ rados_put_get_data eio $dir $shard_id recovery || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery the object attr read error
+function TEST_ec_object_attr_read_error() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 7 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ local primary_osd=$(get_primary $poolname $objname)
+ # Kill primary OSD
+ kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
+
+ # Write data
+ rados_put $dir $poolname $objname || return 1
+
+ # Inject eio, shard 1 is the one read attr
+ inject_eio ec mdata $poolname $objname $dir 1 || return 1
+
+ # Restart OSD
+ activate_osd $dir ${primary_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery the first k copies aren't all available
+function TEST_ec_single_recovery_error() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 7 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname $objname || return 1
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when repeated reads are needed due to EIO
+function TEST_ec_recovery_multiple_errors() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 9 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 4 4 || return 1
+
+ rados_put $dir $poolname $objname || return 1
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
+ # tried as well. Make that fail to test multiple-EIO handling.
+ inject_eio ec data $poolname $objname $dir 3 || return 1
+ inject_eio ec data $poolname $objname $dir 4 || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when there's only one shard to recover, but multiple
+# objects recovering in one RecoveryOp
+function TEST_ec_recovery_multiple_objects() {
+ local dir=$1
+ local objname=myobject
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ setup_osds 7 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname test1
+ rados_put $dir $poolname test2
+ rados_put $dir $poolname test3
+
+ ceph osd out 0 || return 1
+
+ # Cluster should recover these objects all at once
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname test1
+ rados_get $dir $poolname test2
+ rados_get $dir $poolname test3
+
+ delete_erasure_coded_pool $poolname
+}
+
+# test multi-object recovery when the one missing shard gets EIO
+function TEST_ec_recovery_multiple_objects_eio() {
+ local dir=$1
+ local objname=myobject
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ setup_osds 7 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname test1
+ rados_put $dir $poolname test2
+ rados_put $dir $poolname test3
+
+ # can't read from this shard anymore
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ ceph osd out 0 || return 1
+
+ # Cluster should recover these objects all at once
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname test1
+ rados_get $dir $poolname test2
+ rados_get $dir $poolname test3
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test backfill with unfound object
+function TEST_ec_backfill_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=300
+ # Must be between 1 and $lastobj
+ local testobj=obj250
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+ setup_osds 5 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+ local primary=$(get_primary $poolname $objname)
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio ec data $poolname $testobj $dir 0 || return 1
+ inject_eio ec data $poolname $testobj $dir 1 || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 240); do
+ state=$(get_state 2.0)
+ echo $state | grep backfill_unfound
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo $state
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ sleep 5
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_unfound
+ ceph pg 2.0 query
+
+ ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+ check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound")
+ test "$check" == "true" || return 1
+
+ eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].status)
+ test "$check" == "osd is down" || return 1
+
+ eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].osd)
+ test "$check" == "2(4)" || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery with unfound object
+function TEST_ec_recovery_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=100
+ # Must be between 1 and $lastobj
+ local testobj=obj75
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+ setup_osds 5 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio ec data $poolname $testobj $dir 0 || return 1
+ inject_eio ec data $poolname $testobj $dir 1 || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 100); do
+ state=$(get_state 2.0)
+ echo $state | grep recovery_unfound
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo "$state "
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_unfound
+ ceph pg 2.0 query
+
+ ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+ check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound")
+ test "$check" == "true" || return 1
+
+ check=$(ceph pg 2.0 list_unfound | jq ".might_have_unfound | length")
+ test $check == 0 || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_erasure_coded_pool $poolname
+}
+
+main test-erasure-eio "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"
+# End:
diff --git a/qa/standalone/mgr/balancer.sh b/qa/standalone/mgr/balancer.sh
new file mode 100755
index 000000000..2d7b2f35d
--- /dev/null
+++ b/qa/standalone/mgr/balancer.sh
@@ -0,0 +1,223 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+TEST_POOL1=test1
+TEST_POOL2=test2
+
+function TEST_balancer() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ create_pool $TEST_POOL1 8
+ create_pool $TEST_POOL2 8
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+ ceph balancer status || return 1
+ eval MODE=$(ceph balancer status | jq '.mode')
+ test $MODE = "upmap" || return 1
+ ACTIVE=$(ceph balancer status | jq '.active')
+ test $ACTIVE = "true" || return 1
+
+ ceph balancer ls || return 1
+ PLANS=$(ceph balancer ls)
+ test "$PLANS" = "[]" || return 1
+ ceph balancer eval || return 1
+ EVAL="$(ceph balancer eval)"
+ test "$EVAL" = "current cluster score 0.000000 (lower is better)"
+ ceph balancer eval-verbose || return 1
+
+ ceph balancer pool add $TEST_POOL1 || return 1
+ ceph balancer pool add $TEST_POOL2 || return 1
+ ceph balancer pool ls || return 1
+ eval POOL=$(ceph balancer pool ls | jq 'sort | .[0]')
+ test "$POOL" = "$TEST_POOL1" || return 1
+ eval POOL=$(ceph balancer pool ls | jq 'sort | .[1]')
+ test "$POOL" = "$TEST_POOL2" || return 1
+ ceph balancer pool rm $TEST_POOL1 || return 1
+ ceph balancer pool rm $TEST_POOL2 || return 1
+ ceph balancer pool ls || return 1
+ ceph balancer pool add $TEST_POOL1 || return 1
+
+ ceph balancer mode crush-compat || return 1
+ ceph balancer status || return 1
+ eval MODE=$(ceph balancer status | jq '.mode')
+ test $MODE = "crush-compat" || return 1
+ ceph balancer off || return 1
+ ! ceph balancer optimize plan_crush $TEST_POOL1 || return 1
+ ceph balancer status || return 1
+ eval RESULT=$(ceph balancer status | jq '.optimize_result')
+ test "$RESULT" = "Distribution is already perfect" || return 1
+
+ ceph balancer on || return 1
+ ACTIVE=$(ceph balancer status | jq '.active')
+ test $ACTIVE = "true" || return 1
+ sleep 2
+ ceph balancer status || return 1
+ ceph balancer off || return 1
+ ACTIVE=$(ceph balancer status | jq '.active')
+ test $ACTIVE = "false" || return 1
+ sleep 2
+
+ ceph balancer reset || return 1
+
+ ceph balancer mode upmap || return 1
+ ceph balancer status || return 1
+ eval MODE=$(ceph balancer status | jq '.mode')
+ test $MODE = "upmap" || return 1
+ ! ceph balancer optimize plan_upmap $TEST_POOL || return 1
+ ceph balancer status || return 1
+ eval RESULT=$(ceph balancer status | jq '.optimize_result')
+ test "$RESULT" = "Unable to find further optimization, or pool(s) pg_num is decreasing, or distribution is already perfect" || return 1
+
+ ceph balancer on || return 1
+ ACTIVE=$(ceph balancer status | jq '.active')
+ test $ACTIVE = "true" || return 1
+ sleep 2
+ ceph balancer status || return 1
+ ceph balancer off || return 1
+ ACTIVE=$(ceph balancer status | jq '.active')
+ test $ACTIVE = "false" || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_balancer2() {
+ local dir=$1
+ TEST_PGS1=118
+ TEST_PGS2=132
+ TOTAL_PGS=$(expr $TEST_PGS1 + $TEST_PGS2)
+ OSDS=5
+ DEFAULT_REPLICAS=3
+ # Integer average of PGS per OSD (70.8), so each OSD >= this
+ FINAL_PER_OSD1=$(expr \( $TEST_PGS1 \* $DEFAULT_REPLICAS \) / $OSDS)
+ # Integer average of PGS per OSD (150)
+ FINAL_PER_OSD2=$(expr \( \( $TEST_PGS1 + $TEST_PGS2 \) \* $DEFAULT_REPLICAS \) / $OSDS)
+
+ CEPH_ARGS+="--osd_pool_default_pg_autoscale_mode=off "
+ CEPH_ARGS+="--debug_osd=20 "
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set-require-min-compat-client luminous
+ ceph config set mgr mgr/balancer/upmap_max_deviation 1
+ ceph balancer mode upmap || return 1
+ ceph balancer on || return 1
+ ceph config set mgr mgr/balancer/sleep_interval 5
+
+ create_pool $TEST_POOL1 $TEST_PGS1
+
+ wait_for_clean || return 1
+
+ # Wait up to 2 minutes
+ OK=no
+ for i in $(seq 1 25)
+ do
+ sleep 5
+ if grep -q "Optimization plan is almost perfect" $dir/mgr.x.log
+ then
+ OK=yes
+ break
+ fi
+ done
+ test $OK = "yes" || return 1
+ # Plan is found, but PGs still need to move
+ sleep 10
+ wait_for_clean || return 1
+ ceph osd df
+
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs')
+ test $PGS -ge $FINAL_PER_OSD1 || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs')
+ test $PGS -ge $FINAL_PER_OSD1 || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs')
+ test $PGS -ge $FINAL_PER_OSD1 || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs')
+ test $PGS -ge $FINAL_PER_OSD1 || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs')
+ test $PGS -ge $FINAL_PER_OSD1 || return 1
+
+ create_pool $TEST_POOL2 $TEST_PGS2
+
+ # Wait up to 2 minutes
+ OK=no
+ for i in $(seq 1 25)
+ do
+ sleep 5
+ COUNT=$(grep "Optimization plan is almost perfect" $dir/mgr.x.log | wc -l)
+ if test $COUNT = "2"
+ then
+ OK=yes
+ break
+ fi
+ done
+ test $OK = "yes" || return 1
+ # Plan is found, but PGs still need to move
+ sleep 10
+ wait_for_clean || return 1
+ ceph osd df
+
+ # We should be with plus or minus 2 of FINAL_PER_OSD2
+ # This is because here each pool is balanced independently
+ MIN=$(expr $FINAL_PER_OSD2 - 2)
+ MAX=$(expr $FINAL_PER_OSD2 + 2)
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs')
+ test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs')
+ test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs')
+ test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs')
+ test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+ PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs')
+ test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+
+ teardown $dir || return 1
+}
+
+main balancer "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh balancer.sh"
+# End:
diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh
new file mode 100755
index 000000000..59f002584
--- /dev/null
+++ b/qa/standalone/misc/mclock-config.sh
@@ -0,0 +1,467 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat <contact@redhat.com>
+#
+# Author: Sridhar Seshasayee <sseshasa@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug-mclock 20 "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_profile_builtin_to_custom() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify the default mclock profile on the OSD
+ local mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ mclock_profile=$(eval echo $mclock_profile)
+ test "$mclock_profile" = "high_recovery_ops" || return 1
+
+ # Change the mclock profile to 'custom'
+ ceph tell osd.0 config set osd_mclock_profile custom || return 1
+
+ # Verify that the mclock profile is set to 'custom' on the OSDs
+ mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_profile | jq .osd_mclock_profile)
+ mclock_profile=$(eval echo $mclock_profile)
+ test "$mclock_profile" = "custom" || return 1
+
+ # Change a mclock config param and confirm the change
+ local client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ echo "client_res = $client_res"
+ local client_res_new=$(echo "$client_res + 0.1" | bc -l)
+ echo "client_res_new = $client_res_new"
+ ceph config set osd.0 osd_mclock_scheduler_client_res \
+ $client_res_new || return 1
+
+ # Check value in config monitor db
+ local res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+
+ teardown $dir || return 1
+}
+
+function TEST_profile_custom_to_builtin() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify the default mclock profile on the OSD
+ local def_mclock_profile
+ def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$def_mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ local orig_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ orig_mclock_profile=$(eval echo $orig_mclock_profile)
+ test $orig_mclock_profile = "high_recovery_ops" || return 1
+
+ # Change the mclock profile to 'custom'
+ ceph tell osd.0 config set osd_mclock_profile custom || return 1
+
+ # Verify that the mclock profile is set to 'custom' on the OSDs
+ local mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile | \
+ jq .osd_mclock_profile)
+ mclock_profile=$(eval echo $mclock_profile)
+ test $mclock_profile = "custom" || return 1
+
+ # Save the original client reservations allocated to the OSDs
+ local client_res
+ client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ echo "Original client_res for osd.0 = $client_res"
+
+ # Change a mclock config param and confirm the change
+ local client_res_new=$(echo "$client_res + 0.1" | bc -l)
+ echo "client_res_new = $client_res_new"
+ ceph config set osd osd_mclock_scheduler_client_res \
+ $client_res_new || return 1
+ # Check value in config monitor db
+ local res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+
+ # Switch the mclock profile back to the original built-in profile.
+ # The config subsystem prevents the overwrite of the changed QoS config
+ # option above i.e. osd_mclock_scheduler_client_res. This fact is verified
+ # before proceeding to remove the entry from the config monitor db. After
+ # the config entry is removed, the original value for the config option is
+ # restored and is verified.
+ ceph tell osd.0 config set osd_mclock_profile $orig_mclock_profile || return 1
+ # Verify that the mclock profile is set back to the original on the OSD
+ eval mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile | \
+ jq .osd_mclock_profile)
+ #mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$mclock_profile" = "$orig_mclock_profile" || return 1
+
+ # Verify that the new value is still in effect
+ # Check value in config monitor db
+ local res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res_new" | bc -l) )); then
+ return 1
+ fi
+
+ # Remove the changed QoS config option from monitor db
+ ceph config rm osd osd_mclock_scheduler_client_res || return 1
+
+ sleep 5 # Allow time for change to take effect
+
+ # Verify that the original values are now restored
+ # Check value in config monitor db
+ res=$(ceph config get osd.0 \
+ osd_mclock_scheduler_client_res) || return 1
+ if (( $(echo "$res != 0.0" | bc -l) )); then
+ return 1
+ fi
+
+ # Check value in the in-memory 'values' map
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get osd_mclock_scheduler_client_res | \
+ jq .osd_mclock_scheduler_client_res | bc)
+ if (( $(echo "$res != $client_res" | bc -l) )); then
+ return 1
+ fi
+
+ teardown $dir || return 1
+}
+
+function TEST_recovery_limit_adjustment_mclock() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+ local recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_recovery_max_active)
+ # Get default value
+ echo "$recoveries" | grep --quiet 'osd_recovery_max_active' || return 1
+
+ # Change the recovery limit without setting
+ # osd_mclock_override_recovery_settings option. Verify that the recovery
+ # limit is retained at its default value.
+ ceph config set osd.0 osd_recovery_max_active 10 || return 1
+ sleep 2 # Allow time for change to take effect
+ local max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_recovery_max_active)
+ test "$max_recoveries" = "$recoveries" || return 1
+
+ # Change recovery limit after setting osd_mclock_override_recovery_settings.
+ # Verify that the recovery limit is modified.
+ ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1
+ ceph config set osd.0 osd_recovery_max_active 10 || return 1
+ sleep 2 # Allow time for change to take effect
+ max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_recovery_max_active)
+ test "$max_recoveries" = '{"osd_recovery_max_active":"10"}' || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_backfill_limit_adjustment_mclock() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+ local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ # Get default value
+ echo "osd_max_backfills: $backfills" || return 1
+
+ # Change the backfill limit without setting
+ # osd_mclock_override_recovery_settings option. Verify that the backfill
+ # limit is retained at its default value.
+ ceph config set osd.0 osd_max_backfills 20 || return 1
+ sleep 2 # Allow time for change to take effect
+ local max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ test $max_backfills = $backfills || return 1
+
+ # Verify local and async reserver settings are not changed
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+ test $max_backfills = $backfills || return 1
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+ test $max_backfills = $backfills || return 1
+
+ # Change backfills limit after setting osd_mclock_override_recovery_settings.
+ # Verify that the backfills limit is modified.
+ ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1
+ ceph config set osd.0 osd_max_backfills 20 || return 1
+ sleep 2 # Allow time for change to take effect
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ test $max_backfills = 20 || return 1
+
+ # Verify local and async reserver settings are changed
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+
+ # Kill osd and bring it back up.
+ # Confirm that the backfill settings are retained.
+ kill_daemons $dir TERM osd || return 1
+ ceph osd down 0 || return 1
+ wait_for_osd down 0 || return 1
+ activate_osd $dir 0 --osd-op-queue=mclock_scheduler || return 1
+
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ config get osd_max_backfills | jq .osd_max_backfills | bc)
+ test $max_backfills = 20 || return 1
+
+ # Verify local and async reserver settings are changed
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+ max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+ dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+ test $max_backfills = 20 || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_profile_disallow_builtin_params_modify() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify that the default mclock profile is set on the OSD
+ local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$def_mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ cur_mclock_profile=$(eval echo $cur_mclock_profile)
+ test $cur_mclock_profile = "high_recovery_ops" || return 1
+
+ declare -a options=("osd_mclock_scheduler_background_recovery_res"
+ "osd_mclock_scheduler_client_res")
+
+ local retries=10
+ local errors=0
+ for opt in "${options[@]}"
+ do
+ # Try and change a mclock config param and confirm that no change occurred
+ local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get $opt | jq .$opt | bc)
+ local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l)
+ ceph config set osd.0 $opt $opt_val_new || return 1
+
+ # Check configuration values
+ for count in $(seq 0 $(expr $retries - 1))
+ do
+ errors=0
+ sleep 2 # Allow time for changes to take effect
+
+ echo "Check configuration values - Attempt#: $count"
+ # Check configuration value on Mon store (or the default) for the osd
+ local res=$(ceph config get osd.0 $opt) || return 1
+ echo "Mon db (or default): osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check running configuration value using "config show" cmd
+ res=$(ceph config show osd.0 | grep $opt |\
+ awk '{ print $2 }' | bc ) || return 1
+ echo "Running config: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check value in the in-memory 'values' map is unmodified
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get $opt | jq .$opt | bc)
+ echo "Values map: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check if we succeeded or exhausted retry count
+ if [ $errors -eq 0 ]
+ then
+ break
+ elif [ $count -eq $(expr $retries - 1) ]
+ then
+ return 1
+ fi
+ done
+ done
+
+ teardown $dir || return 1
+}
+
+function TEST_profile_disallow_builtin_params_override() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+ # Verify that the default mclock profile is set on the OSD
+ local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+ test "$def_mclock_profile" = "balanced" || return 1
+
+ # Verify the running mClock profile
+ local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get osd_mclock_profile |\
+ jq .osd_mclock_profile)
+ cur_mclock_profile=$(eval echo $cur_mclock_profile)
+ test $cur_mclock_profile = "high_recovery_ops" || return 1
+
+ declare -a options=("osd_mclock_scheduler_background_recovery_res"
+ "osd_mclock_scheduler_client_res")
+
+ local retries=10
+ local errors=0
+ for opt in "${options[@]}"
+ do
+ # Override a mclock config param and confirm that no change occurred
+ local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
+ $(get_asok_path osd.0) config get $opt | jq .$opt | bc)
+ local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l)
+ ceph tell osd.0 config set $opt $opt_val_new || return 1
+
+ # Check configuration values
+ for count in $(seq 0 $(expr $retries - 1))
+ do
+ errors=0
+ sleep 2 # Allow time for changes to take effect
+
+ echo "Check configuration values - Attempt#: $count"
+ # Check configuration value on Mon store (or the default) for the osd
+ local res=$(ceph config get osd.0 $opt) || return 1
+ echo "Mon db (or default): osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check running configuration value using "config show" cmd
+ res=$(ceph config show osd.0 | grep $opt |\
+ awk '{ print $2 }' | bc ) || return 1
+ echo "Running config: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check value in the in-memory 'values' map is unmodified
+ res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+ osd.0) config get $opt | jq .$opt | bc)
+ echo "Values map: osd.0 $opt = $res"
+ if (( $(echo "$res == $opt_val_new" | bc -l) || \
+ $(echo "$res != $opt_val_orig" | bc -l) )); then
+ errors=$(expr $errors + 1)
+ fi
+
+ # Check if we succeeded or exhausted retry count
+ if [ $errors -eq 0 ]
+ then
+ break
+ elif [ $count -eq $(expr $retries - 1) ]
+ then
+ return 1
+ fi
+ done
+ done
+
+ teardown $dir || return 1
+}
+
+main mclock-config "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh mclock-config.sh"
+# End:
diff --git a/qa/standalone/misc/network-ping.sh b/qa/standalone/misc/network-ping.sh
new file mode 100755
index 000000000..4745108c5
--- /dev/null
+++ b/qa/standalone/misc/network-ping.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug_disable_randomized_ping=true "
+ CEPH_ARGS+="--debug_heartbeat_testing_span=5 "
+ CEPH_ARGS+="--osd_heartbeat_interval=1 "
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_network_ping_test1() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+
+ create_pool foo 16
+
+ # write some objects
+ timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
+
+ # Get 1 cycle worth of ping data "1 minute"
+ sleep 10
+ flush_pg_stats
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ # Wait another 4 cycles to get "5 minute interval"
+ sleep 20
+ flush_pg_stats
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+
+ # Wait another 10 cycles to get "15 minute interval"
+ sleep 50
+ flush_pg_stats
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+ test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+ test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+ # Just check the threshold output matches the input
+ CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json
+ test "$(cat $dir/json | jq '.threshold')" = "99" || return 1
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json
+ test "$(cat $dir/json | jq '.threshold')" = "98" || return 1
+
+ rm -f $dir/json
+}
+
+# Test setting of mon_warn_on_slow_ping_time very low to
+# get health warning
+function TEST_network_ping_test2() {
+ local dir=$1
+
+ export CEPH_ARGS
+ export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+ ceph osd crush add-bucket dc1 datacenter
+ ceph osd crush add-bucket dc2 datacenter
+ ceph osd crush add-bucket dc3 datacenter
+ ceph osd crush add-bucket rack1 rack
+ ceph osd crush add-bucket rack2 rack
+ ceph osd crush add-bucket rack3 rack
+ ceph osd crush add-bucket host1 host
+ ceph osd crush add-bucket host2 host
+ ceph osd crush add-bucket host3 host
+ ceph osd crush move dc1 root=default
+ ceph osd crush move dc2 root=default
+ ceph osd crush move dc3 root=default
+ ceph osd crush move rack1 datacenter=dc1
+ ceph osd crush move rack2 datacenter=dc2
+ ceph osd crush move rack3 datacenter=dc3
+ ceph osd crush move host1 rack=rack1
+ ceph osd crush move host2 rack=rack2
+ ceph osd crush move host3 rack=rack3
+ ceph osd crush set osd.0 1.0 host=host1
+ ceph osd crush set osd.1 1.0 host=host2
+ ceph osd crush set osd.2 1.0 host=host3
+ ceph osd crush rule create-simple myrule default host firstn
+
+ create_pool foo 16 16 replicated myrule
+
+ # write some objects
+ timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
+
+ # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings)
+ sleep 10
+ flush_pg_stats
+
+ ceph health | tee $dir/health
+ grep -q "Slow OSD heartbeats" $dir/health || return 1
+
+ ceph health detail | tee $dir/health
+ grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
+ grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
+ grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
+to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
+ rm -f $dir/health
+}
+
+main network-ping "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh"
+# End:
diff --git a/qa/standalone/misc/ok-to-stop.sh b/qa/standalone/misc/ok-to-stop.sh
new file mode 100755
index 000000000..dc9e7422f
--- /dev/null
+++ b/qa/standalone/misc/ok-to-stop.sh
@@ -0,0 +1,296 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7150" # git grep '\<7150\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7151" # git grep '\<7151\>' : there must be only one
+ export CEPH_MON_C="127.0.0.1:7152" # git grep '\<7152\>' : there must be only one
+ export CEPH_MON_D="127.0.0.1:7153" # git grep '\<7153\>' : there must be only one
+ export CEPH_MON_E="127.0.0.1:7154" # git grep '\<7154\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ export ORIG_CEPH_ARGS="$CEPH_ARGS"
+
+ local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ kill_daemons $dir KILL || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_1_mon_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+
+ ceph mon ok-to-stop dne || return 1
+ ! ceph mon ok-to-stop a || return 1
+
+ ! ceph mon ok-to-add-offline || return 1
+
+ ! ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm dne || return 1
+}
+
+function TEST_2_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+
+ ceph mon ok-to-stop dne || return 1
+ ! ceph mon ok-to-stop a || return 1
+ ! ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop a b || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm dne || return 1
+}
+
+function TEST_3_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+ wait_for_quorum 60 3
+
+ ceph mon ok-to-stop dne || return 1
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ! ceph mon ok-to-stop a b || return 1
+ ! ceph mon ok-to-stop b c || return 1
+ ! ceph mon ok-to-stop a b c || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+
+ kill_daemons $dir KILL mon.b
+ wait_for_quorum 60 2
+
+ ! ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop c || return 1
+
+ ! ceph mon ok-to-add-offline || return 1
+
+ ! ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ! ceph mon ok-to-rm c || return 1
+}
+
+function TEST_4_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+ run_mon $dir d --public-addr=$CEPH_MON_D || return 1
+ wait_for_quorum 60 4
+
+ ceph mon ok-to-stop dne || return 1
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ceph mon ok-to-stop d || return 1
+ ! ceph mon ok-to-stop a b || return 1
+ ! ceph mon ok-to-stop c d || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+
+ kill_daemons $dir KILL mon.a
+ wait_for_quorum 60 3
+
+ ceph mon ok-to-stop a || return 1
+ ! ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop c || return 1
+ ! ceph mon ok-to-stop d || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+ ceph mon ok-to-rm d || return 1
+}
+
+function TEST_5_mons_checks() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+ run_mon $dir d --public-addr=$CEPH_MON_D || return 1
+ run_mon $dir e --public-addr=$CEPH_MON_E || return 1
+ wait_for_quorum 60 5
+
+ ceph mon ok-to-stop dne || return 1
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ceph mon ok-to-stop d || return 1
+ ceph mon ok-to-stop e || return 1
+ ceph mon ok-to-stop a b || return 1
+ ceph mon ok-to-stop c d || return 1
+ ! ceph mon ok-to-stop a b c || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+ ceph mon ok-to-rm d || return 1
+ ceph mon ok-to-rm e || return 1
+
+ kill_daemons $dir KILL mon.a
+ wait_for_quorum 60 4
+
+ ceph mon ok-to-stop a || return 1
+ ceph mon ok-to-stop b || return 1
+ ceph mon ok-to-stop c || return 1
+ ceph mon ok-to-stop d || return 1
+ ceph mon ok-to-stop e || return 1
+
+ ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ceph mon ok-to-rm b || return 1
+ ceph mon ok-to-rm c || return 1
+ ceph mon ok-to-rm d || return 1
+ ceph mon ok-to-rm e || return 1
+
+ kill_daemons $dir KILL mon.e
+ wait_for_quorum 60 3
+
+ ceph mon ok-to-stop a || return 1
+ ! ceph mon ok-to-stop b || return 1
+ ! ceph mon ok-to-stop c || return 1
+ ! ceph mon ok-to-stop d || return 1
+ ceph mon ok-to-stop e || return 1
+
+ ! ceph mon ok-to-add-offline || return 1
+
+ ceph mon ok-to-rm a || return 1
+ ! ceph mon ok-to-rm b || return 1
+ ! ceph mon ok-to-rm c || return 1
+ ! ceph mon ok-to-rm d || return 1
+ ceph mon ok-to-rm e || return 1
+}
+
+function TEST_0_mds() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_mds $dir a || return 1
+
+ ceph osd pool create meta 1 || return 1
+ ceph osd pool create data 1 || return 1
+ ceph fs new myfs meta data || return 1
+ sleep 5
+
+ ! ceph mds ok-to-stop a || return 1
+ ! ceph mds ok-to-stop a dne || return 1
+ ceph mds ok-to-stop dne || return 1
+
+ run_mds $dir b || return 1
+ sleep 5
+
+ ceph mds ok-to-stop a || return 1
+ ceph mds ok-to-stop b || return 1
+ ! ceph mds ok-to-stop a b || return 1
+ ceph mds ok-to-stop a dne1 dne2 || return 1
+ ceph mds ok-to-stop b dne || return 1
+ ! ceph mds ok-to-stop a b dne || return 1
+ ceph mds ok-to-stop dne1 dne2 || return 1
+
+ kill_daemons $dir KILL mds.a
+}
+
+function TEST_0_osd() {
+ local dir=$1
+
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd || return 1
+ ceph osd pool create ec erasure ec-profile || return 1
+
+ wait_for_clean || return 1
+
+ # with min_size 3, we can stop only 1 osd
+ ceph osd pool set ec min_size 3 || return 1
+ wait_for_clean || return 1
+
+ ceph osd ok-to-stop 0 || return 1
+ ceph osd ok-to-stop 1 || return 1
+ ceph osd ok-to-stop 2 || return 1
+ ceph osd ok-to-stop 3 || return 1
+ ! ceph osd ok-to-stop 0 1 || return 1
+ ! ceph osd ok-to-stop 2 3 || return 1
+ ceph osd ok-to-stop 0 --max 2 | grep '[0]' || return 1
+ ceph osd ok-to-stop 1 --max 2 | grep '[1]' || return 1
+
+ # with min_size 2 we can stop 1 osds
+ ceph osd pool set ec min_size 2 || return 1
+ wait_for_clean || return 1
+
+ ceph osd ok-to-stop 0 1 || return 1
+ ceph osd ok-to-stop 2 3 || return 1
+ ! ceph osd ok-to-stop 0 1 2 || return 1
+ ! ceph osd ok-to-stop 1 2 3 || return 1
+
+ ceph osd ok-to-stop 0 --max 2 | grep '[0,1]' || return 1
+ ceph osd ok-to-stop 0 --max 20 | grep '[0,1]' || return 1
+ ceph osd ok-to-stop 2 --max 2 | grep '[2,3]' || return 1
+ ceph osd ok-to-stop 2 --max 20 | grep '[2,3]' || return 1
+
+ # we should get the same result with one of the osds already down
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph osd down 0 || return 1
+ wait_for_peered || return 1
+
+ ceph osd ok-to-stop 0 || return 1
+ ceph osd ok-to-stop 0 1 || return 1
+ ! ceph osd ok-to-stop 0 1 2 || return 1
+ ! ceph osd ok-to-stop 1 2 3 || return 1
+}
+
+
+main ok-to-stop "$@"
diff --git a/qa/standalone/misc/rados-striper.sh b/qa/standalone/misc/rados-striper.sh
new file mode 100755
index 000000000..be6349b81
--- /dev/null
+++ b/qa/standalone/misc/rados-striper.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Sebastien Ponce <sebastien.ponce@cern.ch>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7116" # git grep '\<7116\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with one monitor and three osds
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ create_rbd_pool || return 1
+
+ # create toyfile
+ dd if=/dev/urandom of=$dir/toyfile bs=1234 count=1
+
+ # put a striped object
+ rados --pool rbd --striper put toyfile $dir/toyfile || return 1
+
+ # stat it, with and without striping
+ rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1
+ rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1
+ echo ' size 1234' > $dir/refstat
+ diff -w $dir/stripedStat $dir/refstat || return 1
+ diff -w $dir/stat $dir/refstat || return 1
+ rados --pool rbd stat toyfile >& $dir/staterror
+ grep -q 'No such file or directory' $dir/staterror || return 1
+
+ # get the file back with and without striping
+ rados --pool rbd --striper get toyfile $dir/stripedGroup || return 1
+ diff -w $dir/toyfile $dir/stripedGroup || return 1
+ rados --pool rbd get toyfile.0000000000000000 $dir/nonSTripedGroup || return 1
+ diff -w $dir/toyfile $dir/nonSTripedGroup || return 1
+
+ # test truncate
+ rados --pool rbd --striper truncate toyfile 12
+ rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1
+ rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1
+ echo ' size 12' > $dir/reftrunc
+ diff -w $dir/stripedStat $dir/reftrunc || return 1
+ diff -w $dir/stat $dir/reftrunc || return 1
+
+ # test xattrs
+
+ rados --pool rbd --striper setxattr toyfile somexattr somevalue || return 1
+ rados --pool rbd --striper getxattr toyfile somexattr > $dir/xattrvalue || return 1
+ rados --pool rbd getxattr toyfile.0000000000000000 somexattr > $dir/xattrvalue2 || return 1
+ echo 'somevalue' > $dir/refvalue
+ diff -w $dir/xattrvalue $dir/refvalue || return 1
+ diff -w $dir/xattrvalue2 $dir/refvalue || return 1
+ rados --pool rbd --striper listxattr toyfile > $dir/xattrlist || return 1
+ echo 'somexattr' > $dir/reflist
+ diff -w $dir/xattrlist $dir/reflist || return 1
+ rados --pool rbd listxattr toyfile.0000000000000000 | grep -v striper > $dir/xattrlist2 || return 1
+ diff -w $dir/xattrlist2 $dir/reflist || return 1
+ rados --pool rbd --striper rmxattr toyfile somexattr || return 1
+
+ local attr_not_found_str="No data available"
+ [ `uname` = FreeBSD ] && \
+ attr_not_found_str="Attribute not found"
+ expect_failure $dir "$attr_not_found_str" \
+ rados --pool rbd --striper getxattr toyfile somexattr || return 1
+ expect_failure $dir "$attr_not_found_str" \
+ rados --pool rbd getxattr toyfile.0000000000000000 somexattr || return 1
+
+ # test rm
+ rados --pool rbd --striper rm toyfile || return 1
+ expect_failure $dir 'No such file or directory' \
+ rados --pool rbd --striper stat toyfile || return 1
+ expect_failure $dir 'No such file or directory' \
+ rados --pool rbd stat toyfile.0000000000000000 || return 1
+
+ # cleanup
+ teardown $dir || return 1
+}
+
+main rados-striper "$@"
diff --git a/qa/standalone/misc/test-ceph-helpers.sh b/qa/standalone/misc/test-ceph-helpers.sh
new file mode 100755
index 000000000..e7805858a
--- /dev/null
+++ b/qa/standalone/misc/test-ceph-helpers.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Federico Gimenez <fgimenez@coit.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@"
diff --git a/qa/standalone/misc/test-snaptrim-stats.sh b/qa/standalone/misc/test-snaptrim-stats.sh
new file mode 100755
index 000000000..98b3e4fdd
--- /dev/null
+++ b/qa/standalone/misc/test-snaptrim-stats.sh
@@ -0,0 +1,188 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat <contact@redhat.com>
+#
+# Author: Sridhar Seshasayee <sseshasa@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug-bluestore 20 "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_snaptrim_stats() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local PGNUM=8
+ local PGPNUM=8
+ local objects=10
+ local WAIT_FOR_UPDATE=10
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1
+ done
+
+ # disable scrubs
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+
+ # Create a pool
+ create_pool $poolname $PGNUM $PGPNUM
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ # write a few objects
+ TESTDATA="testdata.1"
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # create a snapshot, clones
+ SNAP=1
+ rados -p $poolname mksnap snap${SNAP}
+ TESTDATA="testdata.2"
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # remove the snapshot, should trigger snaptrim
+ rados -p $poolname rmsnap snap${SNAP}
+
+ # check for snaptrim stats
+ wait_for_clean || return 1
+ sleep $WAIT_FOR_UPDATE
+ local objects_trimmed=0
+ local snaptrim_duration_total=0.0
+ for i in $(seq 0 $(expr $PGNUM - 1))
+ do
+ local pgid="${poolid}.${i}"
+ objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \
+ jq '.info.stats.objects_trimmed'))
+ snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \
+ $pgid query | jq '.info.stats.snaptrim_duration') | bc`
+ done
+ test $objects_trimmed -eq $objects || return 1
+ echo "$snaptrim_duration_total > 0.0" | bc || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_snaptrim_stats_multiple_snaps() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local PGNUM=8
+ local PGPNUM=8
+ local objects=10
+ local WAIT_FOR_UPDATE=10
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1
+ done
+
+ # disable scrubs
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+
+ # Create a pool
+ create_pool $poolname $PGNUM $PGPNUM
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ # write a few objects
+ local TESTDATA="testdata.0"
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # create snapshots, clones
+ NUMSNAPS=2
+ for i in `seq 1 $NUMSNAPS`
+ do
+ rados -p $poolname mksnap snap${i}
+ TESTDATA="testdata".${i}
+ dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+ done
+
+ # remove the snapshots, should trigger snaptrim
+ local total_objects_trimmed=0
+ for i in `seq 1 $NUMSNAPS`
+ do
+ rados -p $poolname rmsnap snap${i}
+
+ # check for snaptrim stats
+ wait_for_clean || return 1
+ sleep $WAIT_FOR_UPDATE
+ local objects_trimmed=0
+ local snaptrim_duration_total=0.0
+ for i in $(seq 0 $(expr $PGNUM - 1))
+ do
+ local pgid="${poolid}.${i}"
+ objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \
+ jq '.info.stats.objects_trimmed'))
+ snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \
+ $pgid query | jq '.info.stats.snaptrim_duration') | bc`
+ done
+ test $objects_trimmed -eq $objects || return 1
+ echo "$snaptrim_duration_total > 0.0" | bc || return 1
+ total_objects_trimmed=$(expr $total_objects_trimmed + $objects_trimmed)
+ done
+
+ test $total_objects_trimmed -eq $((objects * NUMSNAPS)) || return 1
+
+ teardown $dir || return 1
+}
+main test-snaptrim-stats "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh test-snaptrim-stats.sh"
+# End:
diff --git a/qa/standalone/misc/ver-health.sh b/qa/standalone/misc/ver-health.sh
new file mode 100755
index 000000000..e03f8f4f5
--- /dev/null
+++ b/qa/standalone/misc/ver-health.sh
@@ -0,0 +1,231 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7165" # git grep '\<7165\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7166" # git grep '\<7166\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--mon_health_to_clog_tick_interval=1.0 "
+ export ORIG_CEPH_ARGS="$CEPH_ARGS"
+
+ local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function wait_for_health_string() {
+ local grep_string=$1
+ local seconds=${2:-20}
+
+ # Allow mon to notice version difference
+ set -o pipefail
+ PASSED="false"
+ for ((i=0; i < $seconds; i++)); do
+ if ceph health | grep -q "$grep_string"
+ then
+ PASSED="true"
+ break
+ fi
+ sleep 1
+ done
+ set +o pipefail
+
+ # Make sure health changed
+ if [ $PASSED = "false" ];
+ then
+ return 1
+ fi
+ return 0
+}
+
+
+
+# Test a single OSD with an old version and multiple OSDs with 2 different old versions
+function TEST_check_version_health_1() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with two monitors and three osds
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ kill_daemons $dir KILL osd.1
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 1
+
+ wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+
+ ceph health detail
+ # Should notice that osd.1 is a different version
+ ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+ wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+ ceph health detail
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+# Test with 1 MON and 1 MDS with an older version, and add 2 OSDs with different versions
+function TEST_check_version_health_2() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with all daemon types
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_mgr $dir x || return 1
+ run_mgr $dir y || return 1
+ run_mds $dir m || return 1
+ run_mds $dir n || return 1
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ kill_daemons $dir KILL mon.b
+ ceph_debug_version_for_testing=01.00.00-gversion-test run_mon $dir b --mon_warn_older_version_delay=0
+ # XXX: Manager doesn't seem to use the test specific config for version
+ #kill_daemons $dir KILL mgr.x
+ #ceph_debug_version_for_testing=02.00.00-gversion-test run_mgr $dir x
+ kill_daemons $dir KILL mds.m
+ ceph_debug_version_for_testing=01.00.00-gversion-test run_mds $dir m
+
+ wait_for_health_string "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
+
+ ceph health detail
+ # Should notice that mon.b and mds.m is a different version
+ ceph health detail | grep -q "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There are daemons running an older version of ceph" || return 1
+ ceph health detail | grep -q "mon.b mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+ wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+ ceph health detail
+ ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "mon.b osd.2 mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+# Verify delay handling with same setup as test 1
+function TEST_check_version_health_3() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with two monitors and three osds
+ run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+
+ local start_osd_time=$SECONDS
+ # use memstore for faster bootup
+ EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 0 || return 1
+ EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 1 || return 1
+ EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 2 || return 1
+ # take the time used for boot osds into consideration
+ local warn_older_version_delay=$(($SECONDS - $start_osd_time + 20))
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+ ceph tell 'mon.*' injectargs "--mon_warn_older_version_delay $warn_older_version_delay"
+ kill_daemons $dir KILL osd.1
+ EXTRA_OPTS=" --osd-objectstore=memstore" \
+ ceph_debug_version_for_testing=01.00.00-gversion-test \
+ activate_osd $dir 1
+
+ # Wait 50% of 20 second delay config
+ sleep 10
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ # Now make sure that at least 20 seconds have passed
+ wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" 20 || return 1
+
+ ceph health detail
+ # Should notice that osd.1 is a different version
+ ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+ wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+ ceph health detail
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+main ver-health "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh ver-health.sh"
+# End:
diff --git a/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
new file mode 100755
index 000000000..276d26aab
--- /dev/null
+++ b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
+ export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
+ export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
+ export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+ export BASE_CEPH_ARGS=$CEPH_ARGS
+ CEPH_ARGS+="--mon-host=$CEPH_MON_A"
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+TEST_stretched_cluster_failover_add_three_osds(){
+ local dir=$1
+ local OSDS=8
+ setup $dir || return 1
+
+ run_mon $dir a --public-addr $CEPH_MON_A || return 1
+ wait_for_quorum 300 1 || return 1
+
+ run_mon $dir b --public-addr $CEPH_MON_B || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
+ wait_for_quorum 300 2 || return 1
+
+ run_mon $dir c --public-addr $CEPH_MON_C || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
+ wait_for_quorum 300 3 || return 1
+
+ run_mon $dir d --public-addr $CEPH_MON_D || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
+ wait_for_quorum 300 4 || return 1
+
+ run_mon $dir e --public-addr $CEPH_MON_E || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
+ wait_for_quorum 300 5 || return 1
+
+ ceph mon set election_strategy connectivity
+ ceph mon add disallowed_leader e
+
+ run_mgr $dir x || return 1
+ run_mgr $dir y || return 1
+ run_mgr $dir z || return 1
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for zone in iris pze
+ do
+ ceph osd crush add-bucket $zone zone
+ ceph osd crush move $zone root=default
+ done
+
+
+ ceph osd crush add-bucket node-2 host
+ ceph osd crush add-bucket node-3 host
+ ceph osd crush add-bucket node-4 host
+ ceph osd crush add-bucket node-5 host
+
+ ceph osd crush move node-2 zone=iris
+ ceph osd crush move node-3 zone=iris
+ ceph osd crush move node-4 zone=pze
+ ceph osd crush move node-5 zone=pze
+
+ ceph osd crush move osd.0 host=node-2
+ ceph osd crush move osd.1 host=node-2
+ ceph osd crush move osd.2 host=node-3
+ ceph osd crush move osd.3 host=node-3
+ ceph osd crush move osd.4 host=node-4
+ ceph osd crush move osd.5 host=node-4
+ ceph osd crush move osd.6 host=node-5
+ ceph osd crush move osd.7 host=node-5
+
+ ceph mon set_location a zone=iris host=node-2
+ ceph mon set_location b zone=iris host=node-3
+ ceph mon set_location c zone=pze host=node-4
+ ceph mon set_location d zone=pze host=node-5
+
+ hostname=$(hostname -s)
+ ceph osd crush remove $hostname || return 1
+ ceph osd getcrushmap > crushmap || return 1
+ crushtool --decompile crushmap > crushmap.txt || return 1
+ sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
+ cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+ id 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take iris
+ step chooseleaf firstn 2 type host
+ step emit
+ step take pze
+ step chooseleaf firstn 2 type host
+ step emit
+}
+
+# end crush map
+EOF
+
+ crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
+ ceph osd setcrushmap -i crushmap.bin || return 1
+ local stretched_poolname=stretched_rbdpool
+ ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
+ ceph osd pool set $stretched_poolname size 4 || return 1
+
+ sleep 3
+
+ ceph mon set_location e zone=arbiter host=node-1
+ ceph mon enable_stretch_mode e stretch_rule zone
+
+ kill_daemons $dir KILL mon.c || return 1
+ kill_daemons $dir KILL mon.d || return 1
+
+ kill_daemons $dir KILL osd.4 || return 1
+ kill_daemons $dir KILL osd.5 || return 1
+ kill_daemons $dir KILL osd.6 || return 1
+ kill_daemons $dir KILL osd.7 || return 1
+
+ ceph -s
+
+ sleep 3
+
+ run_osd $dir 8 || return 1
+ run_osd $dir 9 || return 1
+ run_osd $dir 10 || return 1
+
+ ceph -s
+
+ sleep 3
+
+ teardown $dir || return 1
+}
+main mon-stretch-fail-recovery "$@" \ No newline at end of file
diff --git a/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
new file mode 100755
index 000000000..7e13f4076
--- /dev/null
+++ b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
@@ -0,0 +1,145 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
+ export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
+ export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
+ export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+ export BASE_CEPH_ARGS=$CEPH_ARGS
+ CEPH_ARGS+="--mon-host=$CEPH_MON_A"
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+TEST_stretched_cluster_uneven_weight() {
+ local dir=$1
+ local OSDS=4
+ local weight=0.09000
+ setup $dir || return 1
+
+ run_mon $dir a --public-addr $CEPH_MON_A || return 1
+ wait_for_quorum 300 1 || return 1
+
+ run_mon $dir b --public-addr $CEPH_MON_B || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
+ wait_for_quorum 300 2 || return 1
+
+ run_mon $dir c --public-addr $CEPH_MON_C || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
+ wait_for_quorum 300 3 || return 1
+
+ run_mon $dir d --public-addr $CEPH_MON_D || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
+ wait_for_quorum 300 4 || return 1
+
+ run_mon $dir e --public-addr $CEPH_MON_E || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
+ wait_for_quorum 300 5 || return 1
+
+ ceph mon set election_strategy connectivity
+ ceph mon add disallowed_leader e
+
+ run_mgr $dir x || return 1
+ run_mgr $dir y || return 1
+ run_mgr $dir z || return 1
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for zone in iris pze
+ do
+ ceph osd crush add-bucket $zone zone
+ ceph osd crush move $zone root=default
+ done
+
+ ceph osd crush add-bucket node-2 host
+ ceph osd crush add-bucket node-3 host
+ ceph osd crush add-bucket node-4 host
+ ceph osd crush add-bucket node-5 host
+
+ ceph osd crush move node-2 zone=iris
+ ceph osd crush move node-3 zone=iris
+ ceph osd crush move node-4 zone=pze
+ ceph osd crush move node-5 zone=pze
+
+ ceph osd crush move osd.0 host=node-2
+ ceph osd crush move osd.1 host=node-3
+ ceph osd crush move osd.2 host=node-4
+ ceph osd crush move osd.3 host=node-5
+
+ ceph mon set_location a zone=iris host=node-2
+ ceph mon set_location b zone=iris host=node-3
+ ceph mon set_location c zone=pze host=node-4
+ ceph mon set_location d zone=pze host=node-5
+
+ hostname=$(hostname -s)
+ ceph osd crush remove $hostname || return 1
+ ceph osd getcrushmap > crushmap || return 1
+ crushtool --decompile crushmap > crushmap.txt || return 1
+ sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
+ cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+ id 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take iris
+ step chooseleaf firstn 2 type host
+ step emit
+ step take pze
+ step chooseleaf firstn 2 type host
+ step emit
+}
+# end crush map
+EOF
+
+ crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
+ ceph osd setcrushmap -i crushmap.bin || return 1
+ local stretched_poolname=stretched_rbdpool
+ ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
+ ceph osd pool set $stretched_poolname size 4 || return 1
+
+ ceph mon set_location e zone=arbiter host=node-1 || return 1
+ ceph mon enable_stretch_mode e stretch_rule zone || return 1 # Enter strech mode
+
+ # reweight to a more round decimal.
+ ceph osd crush reweight osd.0 $weight
+ ceph osd crush reweight osd.1 $weight
+ ceph osd crush reweight osd.2 $weight
+ ceph osd crush reweight osd.3 $weight
+
+ # Firstly, we test for stretch mode buckets != 2
+ ceph osd crush add-bucket sham zone || return 1
+ ceph osd crush move sham root=default || return 1
+ wait_for_health "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
+
+ ceph osd crush rm sham # clear the health warn
+ wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
+
+ # Next, we test for uneven weights across buckets
+
+ ceph osd crush reweight osd.0 0.07000
+
+ wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
+
+ ceph osd crush reweight osd.0 $weight # clear the health warn
+
+ wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
+
+ teardown $dir || return 1
+}
+main mon-stretched-cluster-uneven-weight "$@" \ No newline at end of file
diff --git a/qa/standalone/mon/health-mute.sh b/qa/standalone/mon/health-mute.sh
new file mode 100755
index 000000000..d8e07ca06
--- /dev/null
+++ b/qa/standalone/mon/health-mute.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7143" # git grep '\<714\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-pg-warn-min-per-osd 0 --mon-max-pg-per-osd 1000 "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_mute() {
+ local dir=$1
+ setup $dir || return 1
+
+ set -o pipefail
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create foo 8
+ ceph osd pool application enable foo rbd --yes-i-really-mean-it
+ wait_for_clean || return 1
+
+ ceph -s
+ ceph health | grep HEALTH_OK || return 1
+ # test warning on setting pool size=1
+ ceph osd pool set foo size 1 --yes-i-really-mean-it
+ ceph -s
+ ceph health | grep HEALTH_WARN || return 1
+ ceph health detail | grep POOL_NO_REDUNDANCY || return 1
+ ceph health mute POOL_NO_REDUNDANCY
+ ceph -s
+ ceph health | grep HEALTH_OK | grep POOL_NO_REDUNDANCY || return 1
+ ceph health unmute POOL_NO_REDUNDANCY
+ ceph -s
+ ceph health | grep HEALTH_WARN || return 1
+ # restore pool size to default
+ ceph osd pool set foo size 3
+ ceph -s
+ ceph health | grep HEALTH_OK || return 1
+ ceph osd set noup
+ ceph -s
+ ceph health detail | grep OSDMAP_FLAGS || return 1
+ ceph osd down 0
+ ceph -s
+ ceph health detail | grep OSD_DOWN || return 1
+ ceph health detail | grep HEALTH_WARN || return 1
+
+ ceph health mute OSD_DOWN
+ ceph health mute OSDMAP_FLAGS
+ ceph -s
+ ceph health | grep HEALTH_OK | grep OSD_DOWN | grep OSDMAP_FLAGS || return 1
+ ceph health unmute OSD_DOWN
+ ceph -s
+ ceph health | grep HEALTH_WARN || return 1
+
+ # ttl
+ ceph health mute OSD_DOWN 10s
+ ceph -s
+ ceph health | grep HEALTH_OK || return 1
+ sleep 15
+ ceph -s
+ ceph health | grep HEALTH_WARN || return 1
+
+ # sticky
+ ceph health mute OSDMAP_FLAGS --sticky
+ ceph osd unset noup
+ sleep 5
+ ceph -s
+ ceph health | grep OSDMAP_FLAGS || return 1
+ ceph osd set noup
+ ceph -s
+ ceph health | grep HEALTH_OK || return 1
+
+ # rachet down on OSD_DOWN count
+ ceph osd down 0 1
+ ceph -s
+ ceph health detail | grep OSD_DOWN || return 1
+
+ ceph health mute OSD_DOWN
+ kill_daemons $dir TERM osd.0
+ ceph osd unset noup
+ sleep 10
+ ceph -s
+ ceph health detail | grep OSD_DOWN || return 1
+ ceph health detail | grep '1 osds down' || return 1
+ ceph health | grep HEALTH_OK || return 1
+
+ sleep 10 # give time for mon tick to rachet the mute
+ ceph osd set noup
+ ceph health mute OSDMAP_FLAGS
+ ceph -s
+ ceph health detail
+ ceph health | grep HEALTH_OK || return 1
+
+ ceph osd down 1
+ ceph -s
+ ceph health detail
+ ceph health detail | grep '2 osds down' || return 1
+
+ sleep 10 # give time for mute to clear
+ ceph -s
+ ceph health detail
+ ceph health | grep HEALTH_WARN || return 1
+ ceph health detail | grep '2 osds down' || return 1
+
+ teardown $dir || return 1
+}
+
+main health-mute "$@"
diff --git a/qa/standalone/mon/misc.sh b/qa/standalone/mon/misc.sh
new file mode 100755
index 000000000..c7fc6d441
--- /dev/null
+++ b/qa/standalone/mon/misc.sh
@@ -0,0 +1,284 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+TEST_POOL=rbd
+
+function TEST_osd_pool_get_set() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a || return 1
+ create_pool $TEST_POOL 8
+
+ local flag
+ for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do
+ ceph osd pool set $TEST_POOL $flag 0 || return 1
+ ! ceph osd dump | grep 'pool ' | grep $flag || return 1
+ ceph osd pool set $TEST_POOL $flag 1 || return 1
+ ceph osd dump | grep 'pool ' | grep $flag || return 1
+ ceph osd pool set $TEST_POOL $flag false || return 1
+ ! ceph osd dump | grep 'pool ' | grep $flag || return 1
+ ceph osd pool set $TEST_POOL $flag false || return 1
+ # check that setting false twice does not toggle to true (bug)
+ ! ceph osd dump | grep 'pool ' | grep $flag || return 1
+ ceph osd pool set $TEST_POOL $flag true || return 1
+ ceph osd dump | grep 'pool ' | grep $flag || return 1
+ # cleanup
+ ceph osd pool set $TEST_POOL $flag 0 || return 1
+ done
+
+ local size=$(ceph osd pool get $TEST_POOL size|awk '{print $2}')
+ local min_size=$(ceph osd pool get $TEST_POOL min_size|awk '{print $2}')
+ local expected_min_size=$(expr $size - $size / 2)
+ if [ $min_size -ne $expected_min_size ]; then
+ echo "default min_size is wrong: expected $expected_min_size, got $min_size"
+ return 1
+ fi
+
+ ceph osd pool set $TEST_POOL scrub_min_interval 123456 || return 1
+ ceph osd dump | grep 'pool ' | grep 'scrub_min_interval 123456' || return 1
+ ceph osd pool set $TEST_POOL scrub_min_interval 0 || return 1
+ ceph osd dump | grep 'pool ' | grep 'scrub_min_interval' && return 1
+ ceph osd pool set $TEST_POOL scrub_max_interval 123456 || return 1
+ ceph osd dump | grep 'pool ' | grep 'scrub_max_interval 123456' || return 1
+ ceph osd pool set $TEST_POOL scrub_max_interval 0 || return 1
+ ceph osd dump | grep 'pool ' | grep 'scrub_max_interval' && return 1
+ ceph osd pool set $TEST_POOL deep_scrub_interval 123456 || return 1
+ ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval 123456' || return 1
+ ceph osd pool set $TEST_POOL deep_scrub_interval 0 || return 1
+ ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval' && return 1
+
+ #replicated pool size restrict in 1 and 10
+ ! ceph osd pool set $TEST_POOL 11 || return 1
+ #replicated pool min_size must be between in 1 and size
+ ! ceph osd pool set $TEST_POOL min_size $(expr $size + 1) || return 1
+ ! ceph osd pool set $TEST_POOL min_size 0 || return 1
+
+ local ecpool=erasepool
+ create_pool $ecpool 12 12 erasure default || return 1
+ #erasue pool size=k+m, min_size=k
+ local size=$(ceph osd pool get $ecpool size|awk '{print $2}')
+ local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}')
+ local k=$(expr $min_size - 1) # default min_size=k+1
+ #erasure pool size can't change
+ ! ceph osd pool set $ecpool size $(expr $size + 1) || return 1
+ #erasure pool min_size must be between in k and size
+ ceph osd pool set $ecpool min_size $(expr $k + 1) || return 1
+ ! ceph osd pool set $ecpool min_size $(expr $k - 1) || return 1
+ ! ceph osd pool set $ecpool min_size $(expr $size + 1) || return 1
+
+ teardown $dir || return 1
+}
+
+function TEST_mon_add_to_single_mon() {
+ local dir=$1
+
+ fsid=$(uuidgen)
+ MONA=127.0.0.1:7117 # git grep '\<7117\>' : there must be only one
+ MONB=127.0.0.1:7118 # git grep '\<7118\>' : there must be only one
+ CEPH_ARGS_orig=$CEPH_ARGS
+ CEPH_ARGS="--fsid=$fsid --auth-supported=none "
+ CEPH_ARGS+="--mon-initial-members=a "
+ CEPH_ARGS+="--mon-host=$MONA "
+
+ setup $dir || return 1
+ run_mon $dir a --public-addr $MONA || return 1
+ # wait for the quorum
+ timeout 120 ceph -s > /dev/null || return 1
+ run_mon $dir b --public-addr $MONB || return 1
+ teardown $dir || return 1
+
+ setup $dir || return 1
+ run_mon $dir a --public-addr $MONA || return 1
+ # without the fix of #5454, mon.a will assert failure at seeing the MMonJoin
+ # from mon.b
+ run_mon $dir b --public-addr $MONB || return 1
+ # make sure mon.b get's it's join request in first, then
+ sleep 2
+ # wait for the quorum
+ timeout 120 ceph -s > /dev/null || return 1
+ ceph mon dump
+ ceph mon dump -f json-pretty
+ local num_mons
+ num_mons=$(ceph mon dump --format=json 2>/dev/null | jq ".mons | length") || return 1
+ [ $num_mons == 2 ] || return 1
+ # no reason to take more than 120 secs to get this submitted
+ timeout 120 ceph mon add b $MONB || return 1
+ teardown $dir || return 1
+}
+
+function TEST_no_segfault_for_bad_keyring() {
+ local dir=$1
+ setup $dir || return 1
+ # create a client.admin key and add it to ceph.mon.keyring
+ ceph-authtool --create-keyring $dir/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'
+ ceph-authtool --create-keyring $dir/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *'
+ ceph-authtool $dir/ceph.mon.keyring --import-keyring $dir/ceph.client.admin.keyring
+ CEPH_ARGS_TMP="--fsid=$(uuidgen) --mon-host=127.0.0.1:7102 --auth-supported=cephx "
+ CEPH_ARGS_orig=$CEPH_ARGS
+ CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/ceph.mon.keyring "
+ run_mon $dir a
+ # create a bad keyring and make sure no segfault occurs when using the bad keyring
+ echo -e "[client.admin]\nkey = BQAUlgtWoFePIxAAQ9YLzJSVgJX5V1lh5gyctg==" > $dir/bad.keyring
+ CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/bad.keyring"
+ ceph osd dump 2> /dev/null
+ # 139(11|128) means segfault and core dumped
+ [ $? -eq 139 ] && return 1
+ CEPH_ARGS=$CEPH_ARGS_orig
+ teardown $dir || return 1
+}
+
+function TEST_mon_features() {
+ local dir=$1
+ setup $dir || return 1
+
+ fsid=$(uuidgen)
+ MONA=127.0.0.1:7127 # git grep '\<7127\>' ; there must be only one
+ MONB=127.0.0.1:7128 # git grep '\<7128\>' ; there must be only one
+ MONC=127.0.0.1:7129 # git grep '\<7129\>' ; there must be only one
+ CEPH_ARGS_orig=$CEPH_ARGS
+ CEPH_ARGS="--fsid=$fsid --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$MONA,$MONB,$MONC "
+ CEPH_ARGS+="--mon-debug-no-initial-persistent-features "
+ CEPH_ARGS+="--mon-debug-no-require-reef "
+
+ run_mon $dir a --public-addr $MONA || return 1
+ run_mon $dir b --public-addr $MONB || return 1
+ timeout 120 ceph -s > /dev/null || return 1
+
+ # expect monmap to contain 3 monitors (a, b, and c)
+ jqinput="$(ceph quorum_status --format=json 2>/dev/null)"
+ jq_success "$jqinput" '.monmap.mons | length == 3' || return 1
+ # quorum contains two monitors
+ jq_success "$jqinput" '.quorum | length == 2' || return 1
+ # quorum's monitor features contain kraken, luminous, mimic, nautilus,
+ # octopus, pacific, quincy
+ jqfilter='.features.quorum_mon[]|select(. == "kraken")'
+ jq_success "$jqinput" "$jqfilter" "kraken" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "luminous")'
+ jq_success "$jqinput" "$jqfilter" "luminous" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "mimic")'
+ jq_success "$jqinput" "$jqfilter" "mimic" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "nautilus")'
+ jq_success "$jqinput" "$jqfilter" "nautilus" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "octopus")'
+ jq_success "$jqinput" "$jqfilter" "octopus" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "pacific")'
+ jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "quincy")'
+ jq_success "$jqinput" "$jqfilter" "quincy" || return 1
+ jqfilter='.features.quorum_mon[]|select(. == "reef")'
+ jq_success "$jqinput" "$jqfilter" "reef" || return 1
+
+ # monmap must have no persistent features set, because we
+ # don't currently have a quorum made out of all the monitors
+ # in the monmap.
+ jqfilter='.monmap.features.persistent | length == 0'
+ jq_success "$jqinput" "$jqfilter" || return 1
+
+ # nor do we have any optional features, for that matter.
+ jqfilter='.monmap.features.optional | length == 0'
+ jq_success "$jqinput" "$jqfilter" || return 1
+
+ # validate 'mon feature ls'
+
+ jqinput="$(ceph mon feature ls --format=json 2>/dev/null)"
+ # k l m n o p q are supported
+ jqfilter='.all.supported[] | select(. == "kraken")'
+ jq_success "$jqinput" "$jqfilter" "kraken" || return 1
+ jqfilter='.all.supported[] | select(. == "luminous")'
+ jq_success "$jqinput" "$jqfilter" "luminous" || return 1
+ jqfilter='.all.supported[] | select(. == "mimic")'
+ jq_success "$jqinput" "$jqfilter" "mimic" || return 1
+ jqfilter='.all.supported[] | select(. == "nautilus")'
+ jq_success "$jqinput" "$jqfilter" "nautilus" || return 1
+ jqfilter='.all.supported[] | select(. == "octopus")'
+ jq_success "$jqinput" "$jqfilter" "octopus" || return 1
+ jqfilter='.all.supported[] | select(. == "pacific")'
+ jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+ jqfilter='.all.supported[] | select(. == "quincy")'
+ jq_success "$jqinput" "$jqfilter" "quincy" || return 1
+ jqfilter='.all.supported[] | select(. == "reef")'
+ jq_success "$jqinput" "$jqfilter" "reef" || return 1
+
+ # start third monitor
+ run_mon $dir c --public-addr $MONC || return 1
+
+ wait_for_quorum 300 3 || return 1
+
+ timeout 300 ceph -s > /dev/null || return 1
+
+ jqinput="$(ceph quorum_status --format=json 2>/dev/null)"
+ # expect quorum to have all three monitors
+ jqfilter='.quorum | length == 3'
+ jq_success "$jqinput" "$jqfilter" || return 1
+
+ # quorum's monitor features should have p now too
+ jqfilter='.features.quorum_mon[]|select(. == "pacific")'
+ jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+
+ # persistent too
+ jqfilter='.monmap.features.persistent[]|select(. == "kraken")'
+ jq_success "$jqinput" "$jqfilter" "kraken" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "luminous")'
+ jq_success "$jqinput" "$jqfilter" "luminous" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "mimic")'
+ jq_success "$jqinput" "$jqfilter" "mimic" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "osdmap-prune")'
+ jq_success "$jqinput" "$jqfilter" "osdmap-prune" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "nautilus")'
+ jq_success "$jqinput" "$jqfilter" "nautilus" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "octopus")'
+ jq_success "$jqinput" "$jqfilter" "octopus" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "pacific")'
+ jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "elector-pinging")'
+ jq_success "$jqinput" "$jqfilter" "elector-pinging" || return 1
+ jqfilter='.monmap.features.persistent | length == 10'
+ jq_success "$jqinput" "$jqfilter" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "quincy")'
+ jq_success "$jqinput" "$jqfilter" "quincy" || return 1
+ jqfilter='.monmap.features.persistent[]|select(. == "reef")'
+ jq_success "$jqinput" "$jqfilter" "reef" || return 1
+
+ CEPH_ARGS=$CEPH_ARGS_orig
+ # that's all folks. thank you for tuning in.
+ teardown $dir || return 1
+}
+
+main misc "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/misc.sh"
+# End:
diff --git a/qa/standalone/mon/mkfs.sh b/qa/standalone/mon/mkfs.sh
new file mode 100755
index 000000000..6650bdb49
--- /dev/null
+++ b/qa/standalone/mon/mkfs.sh
@@ -0,0 +1,193 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+set -xe
+PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: '
+
+
+DIR=mkfs
+export CEPH_CONF=/dev/null
+unset CEPH_ARGS
+MON_ID=a
+MON_DIR=$DIR/$MON_ID
+CEPH_MON=127.0.0.1:7110 # git grep '\<7110\>' : there must be only one
+TIMEOUT=360
+
+EXTRAOPTS=""
+
+function setup() {
+ teardown
+ mkdir $DIR
+}
+
+function teardown() {
+ kill_daemons
+ rm -fr $DIR
+}
+
+function mon_mkfs() {
+ local fsid=$(uuidgen)
+
+ ceph-mon \
+ --id $MON_ID \
+ --fsid $fsid \
+ $EXTRAOPTS \
+ --mkfs \
+ --mon-data=$MON_DIR \
+ --mon-initial-members=$MON_ID \
+ --mon-host=$CEPH_MON \
+ "$@"
+}
+
+function mon_run() {
+ ceph-mon \
+ --id $MON_ID \
+ --chdir= \
+ --mon-osd-full-ratio=.99 \
+ --mon-data-avail-crit=1 \
+ $EXTRAOPTS \
+ --mon-data=$MON_DIR \
+ --log-file=$MON_DIR/log \
+ --mon-cluster-log-file=$MON_DIR/log \
+ --run-dir=$MON_DIR \
+ --pid-file=$MON_DIR/pidfile \
+ --public-addr $CEPH_MON \
+ "$@"
+}
+
+function kill_daemons() {
+ for pidfile in $(find $DIR -name pidfile) ; do
+ pid=$(cat $pidfile)
+ for try in 0 1 1 1 2 3 ; do
+ kill $pid || break
+ sleep $try
+ done
+ done
+}
+
+function auth_none() {
+ mon_mkfs --auth-supported=none
+
+ ceph-mon \
+ --id $MON_ID \
+ --mon-osd-full-ratio=.99 \
+ --mon-data-avail-crit=1 \
+ $EXTRAOPTS \
+ --mon-data=$MON_DIR \
+ --extract-monmap $MON_DIR/monmap
+
+ [ -f $MON_DIR/monmap ] || return 1
+
+ [ ! -f $MON_DIR/keyring ] || return 1
+
+ mon_run --auth-supported=none
+
+ timeout $TIMEOUT ceph --mon-host $CEPH_MON mon stat || return 1
+}
+
+function auth_cephx_keyring() {
+ cat > $DIR/keyring <<EOF
+[mon.]
+ key = AQDUS79S0AF9FRAA2cgRLFscVce0gROn/s9WMg==
+ caps mon = "allow *"
+EOF
+
+ mon_mkfs --keyring=$DIR/keyring
+
+ [ -f $MON_DIR/keyring ] || return 1
+
+ mon_run
+
+ timeout $TIMEOUT ceph \
+ --name mon. \
+ --keyring $MON_DIR/keyring \
+ --mon-host $CEPH_MON mon stat || return 1
+}
+
+function auth_cephx_key() {
+ if [ -f /etc/ceph/keyring ] ; then
+ echo "Please move /etc/ceph/keyring away for testing!"
+ return 1
+ fi
+
+ local key=$(ceph-authtool --gen-print-key)
+
+ if mon_mkfs --key='corrupted key' ; then
+ return 1
+ else
+ rm -fr $MON_DIR/store.db
+ rm -fr $MON_DIR/kv_backend
+ fi
+
+ mon_mkfs --key=$key
+
+ [ -f $MON_DIR/keyring ] || return 1
+ grep $key $MON_DIR/keyring
+
+ mon_run
+
+ timeout $TIMEOUT ceph \
+ --name mon. \
+ --keyring $MON_DIR/keyring \
+ --mon-host $CEPH_MON mon stat || return 1
+}
+
+function makedir() {
+ local toodeep=$MON_DIR/toodeep
+
+ # fail if recursive directory creation is needed
+ ceph-mon \
+ --id $MON_ID \
+ --mon-osd-full-ratio=.99 \
+ --mon-data-avail-crit=1 \
+ $EXTRAOPTS \
+ --mkfs \
+ --mon-data=$toodeep 2>&1 | tee $DIR/makedir.log
+ grep 'toodeep.*No such file' $DIR/makedir.log > /dev/null
+ rm $DIR/makedir.log
+
+ # an empty directory does not mean the mon exists
+ mkdir $MON_DIR
+ mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log
+ ! grep "$MON_DIR already exists" $DIR/makedir.log || return 1
+}
+
+function idempotent() {
+ mon_mkfs --auth-supported=none
+ mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log
+ grep "'$MON_DIR' already exists" $DIR/makedir.log > /dev/null || return 1
+}
+
+function run() {
+ local actions
+ actions+="makedir "
+ actions+="idempotent "
+ actions+="auth_cephx_key "
+ actions+="auth_cephx_keyring "
+ actions+="auth_none "
+ for action in $actions ; do
+ setup
+ $action || return 1
+ teardown
+ done
+}
+
+run
+
+# Local Variables:
+# compile-command: "cd ../.. ; make TESTS=test/mon/mkfs.sh check"
+# End:
diff --git a/qa/standalone/mon/mon-bind.sh b/qa/standalone/mon/mon-bind.sh
new file mode 100755
index 000000000..41982b916
--- /dev/null
+++ b/qa/standalone/mon/mon-bind.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Quantum Corp.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+SOCAT_PIDS=()
+
+function port_forward() {
+ local source_port=$1
+ local target_port=$2
+
+ socat TCP-LISTEN:${source_port},fork,reuseaddr TCP:localhost:${target_port} &
+ SOCAT_PIDS+=( $! )
+}
+
+function cleanup() {
+ for p in "${SOCAT_PIDS[@]}"; do
+ kill $p
+ done
+ SOCAT_PIDS=()
+}
+
+trap cleanup SIGTERM SIGKILL SIGQUIT SIGINT
+
+function run() {
+ local dir=$1
+ shift
+
+ export MON_IP=127.0.0.1
+ export MONA_PUBLIC=7132 # git grep '\<7132\>' ; there must be only one
+ export MONB_PUBLIC=7133 # git grep '\<7133\>' ; there must be only one
+ export MONC_PUBLIC=7134 # git grep '\<7134\>' ; there must be only one
+ export MONA_BIND=7135 # git grep '\<7135\>' ; there must be only one
+ export MONB_BIND=7136 # git grep '\<7136\>' ; there must be only one
+ export MONC_BIND=7137 # git grep '\<7137\>' ; there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir && cleanup || { cleanup; return 1; }
+ teardown $dir
+ done
+}
+
+function TEST_mon_client_connect_fails() {
+ local dir=$1
+
+ # start the mon with a public-bind-addr that is different
+ # from the public-addr.
+ CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} "
+ run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+
+ # now attempt to ping it that should fail.
+ timeout 3 ceph ping mon.a || return 0
+ return 1
+}
+
+function TEST_mon_client_connect() {
+ local dir=$1
+
+ # start the mon with a public-bind-addr that is different
+ # from the public-addr.
+ CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} "
+ run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+
+ # now forward the public port to the bind port.
+ port_forward ${MONA_PUBLIC} ${MONA_BIND}
+
+ # attempt to connect. we expect that to work
+ ceph ping mon.a || return 1
+}
+
+function TEST_mon_quorum() {
+ local dir=$1
+
+ # start the mon with a public-bind-addr that is different
+ # from the public-addr.
+ CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} "
+ run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+ run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1
+ run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1
+
+ # now forward the public port to the bind port.
+ port_forward ${MONA_PUBLIC} ${MONA_BIND}
+ port_forward ${MONB_PUBLIC} ${MONB_BIND}
+ port_forward ${MONC_PUBLIC} ${MONC_BIND}
+
+ # expect monmap to contain 3 monitors (a, b, and c)
+ jqinput="$(ceph quorum_status --format=json 2>/dev/null)"
+ jq_success "$jqinput" '.monmap.mons | length == 3' || return 1
+
+ # quorum should form
+ wait_for_quorum 300 3 || return 1
+ # expect quorum to have all three monitors
+ jqfilter='.quorum | length == 3'
+ jq_success "$jqinput" "$jqfilter" || return 1
+}
+
+function TEST_put_get() {
+ local dir=$1
+
+ # start the mon with a public-bind-addr that is different
+ # from the public-addr.
+ CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} "
+ run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+ run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1
+ run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1
+
+ # now forward the public port to the bind port.
+ port_forward ${MONA_PUBLIC} ${MONA_BIND}
+ port_forward ${MONB_PUBLIC} ${MONB_BIND}
+ port_forward ${MONC_PUBLIC} ${MONC_BIND}
+
+ # quorum should form
+ wait_for_quorum 300 3 || return 1
+
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_pool hello 8 || return 1
+
+ echo "hello world" > $dir/hello
+ rados --pool hello put foo $dir/hello || return 1
+ rados --pool hello get foo $dir/hello2 || return 1
+ diff $dir/hello $dir/hello2 || return 1
+}
+
+main mon-bind "$@"
diff --git a/qa/standalone/mon/mon-created-time.sh b/qa/standalone/mon/mon-created-time.sh
new file mode 100755
index 000000000..4b8446059
--- /dev/null
+++ b/qa/standalone/mon/mon-created-time.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 SUSE LINUX GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7125" # git grep '\<7125\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_mon_created_time() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ ceph mon dump || return 1
+
+ if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = ""x ; then
+ return 1
+ fi
+
+ if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = "0.000000"x ; then
+ return 1
+ fi
+}
+
+main mon-created-time "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-created-time.sh"
+# End:
diff --git a/qa/standalone/mon/mon-handle-forward.sh b/qa/standalone/mon/mon-handle-forward.sh
new file mode 100755
index 000000000..01c8f130f
--- /dev/null
+++ b/qa/standalone/mon/mon-handle-forward.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014,2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+
+ setup $dir || return 1
+
+ MONA=127.0.0.1:7300
+ MONB=127.0.0.1:7301
+ (
+ FSID=$(uuidgen)
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$FSID --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$MONA,$MONB "
+ run_mon $dir a --public-addr $MONA || return 1
+ run_mon $dir b --public-addr $MONB || return 1
+ )
+
+ timeout 360 ceph --mon-host-override $MONA mon stat || return 1
+ # check that MONB is indeed a peon
+ ceph --admin-daemon $(get_asok_path mon.b) mon_status |
+ grep '"peon"' || return 1
+ # when the leader ( MONA ) is used, there is no message forwarding
+ ceph --mon-host-override $MONA osd pool create POOL1 12
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep 'mon_command(.*"POOL1"' $dir/mon.a.log || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1
+ grep 'mon_command(.*"POOL1"' $dir/mon.b.log && return 1
+ # when the peon ( MONB ) is used, the message is forwarded to the leader
+ ceph --mon-host-override $MONB osd pool create POOL2 12
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1
+ grep 'forward_request.*mon_command(.*"POOL2"' $dir/mon.b.log || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep ' forward(mon_command(.*"POOL2"' $dir/mon.a.log || return 1
+ # forwarded messages must retain features from the original connection
+ features=$(sed -n -e 's|.*127.0.0.1:0.*accept features \([0-9][0-9]*\)|\1|p' < \
+ $dir/mon.b.log)
+ grep ' forward(mon_command(.*"POOL2".*con_features '$features $dir/mon.a.log || return 1
+
+ teardown $dir || return 1
+}
+
+main mon-handle-forward "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 TESTS=test/mon/mon-handle-forward.sh check"
+# End:
diff --git a/qa/standalone/mon/mon-last-epoch-clean.sh b/qa/standalone/mon/mon-last-epoch-clean.sh
new file mode 100755
index 000000000..82243103e
--- /dev/null
+++ b/qa/standalone/mon/mon-last-epoch-clean.sh
@@ -0,0 +1,307 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7302" # git grep '\<7105\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function check_lec_equals_pools() {
+
+ local pool_id=$1
+
+ report=$(ceph report)
+ lec=$(echo $report | \
+ jq '.osdmap_clean_epochs.min_last_epoch_clean')
+
+ if [[ -z "$pool_id" ]]; then
+ pools=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+ " select(.floor == $lec) | .poolid"))
+
+ [[ ${#pools[*]} -eq 2 ]] || ( echo $report ; return 1 )
+ else
+ floor=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+ " select(.poolid == $pool_id) | .floor"))
+
+ [[ $lec -eq $floor ]] || ( echo $report ; return 1 )
+ fi
+ return 0
+}
+
+function check_lec_lower_than_pool() {
+
+ local pool_id=$1
+ [[ -z "$pool_id" ]] && ( echo "expected pool_id as parameter" ; exit 1 )
+
+ report=$(ceph report)
+ lec=$(echo $report | \
+ jq '.osdmap_clean_epochs.min_last_epoch_clean')
+
+ floor=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+ " select(.poolid == $pool_id) | .floor"))
+
+ [[ $lec -lt $floor ]] || ( echo $report ; return 1 )
+ return 0
+}
+
+function check_floor_pool_greater_than_pool() {
+
+ local pool_a=$1
+ local pool_b=$1
+ [[ -z "$pool_a" ]] && ( echo "expected id as first parameter" ; exit 1 )
+ [[ -z "$pool_b" ]] && ( echo "expected id as second parameter" ; exit 1 )
+
+ report=$(ceph report)
+
+ floor_a=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+ " select(.poolid == $pool_a) | .floor"))
+
+ floor_b=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+ " select(.poolid == $pool_b) | .floor"))
+
+ [[ $floor_a -gt $floor_b ]] || ( echo $report ; return 1 )
+ return 0
+}
+
+function check_lec_honours_osd() {
+
+ local osd=$1
+
+ report=$(ceph report)
+ lec=$(echo $report | \
+ jq '.osdmap_clean_epochs.min_last_epoch_clean')
+
+ if [[ -z "$osd" ]]; then
+ osds=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.osd_epochs[] |" \
+ " select(.epoch >= $lec) | .id"))
+
+ [[ ${#osds[*]} -eq 3 ]] || ( echo $report ; return 1 )
+ else
+ epoch=($(echo $report | \
+ jq \
+ ".osdmap_clean_epochs.osd_epochs[] |" \
+ " select(.id == $id) | .epoch"))
+ [[ ${#epoch[*]} -eq 1 ]] || ( echo $report ; return 1 )
+ [[ ${epoch[0]} -ge $lec ]] || ( echo $report ; return 1 )
+ fi
+
+ return 0
+}
+
+function validate_fc() {
+ report=$(ceph report)
+ lec=$(echo $report | \
+ jq '.osdmap_clean_epochs.min_last_epoch_clean')
+ osdm_fc=$(echo $report | \
+ jq '.osdmap_first_committed')
+
+ [[ $lec -eq $osdm_fc ]] || ( echo $report ; return 1 )
+ return 0
+}
+
+function get_fc_lc_diff() {
+ report=$(ceph report)
+ osdm_fc=$(echo $report | \
+ jq '.osdmap_first_committed')
+ osdm_lc=$(echo $report | \
+ jq '.osdmap_last_committed')
+
+ echo $((osdm_lc - osdm_fc))
+}
+
+function get_pool_id() {
+
+ local pn=$1
+ [[ -z "$pn" ]] && ( echo "expected pool name as argument" ; exit 1 )
+
+ report=$(ceph report)
+ pool_id=$(echo $report | \
+ jq ".osdmap.pools[] | select(.pool_name == \"$pn\") | .pool")
+
+ [[ $pool_id -ge 0 ]] || \
+ ( echo "unexpected pool id for pool \'$pn\': $pool_id" ; return -1 )
+
+ echo $pool_id
+ return 0
+}
+
+function wait_for_total_num_maps() {
+ # rip wait_for_health, becaue it's easier than deduplicating the code
+ local -a delays=($(get_timeout_delays $TIMEOUT .1))
+ local -i loop=0
+ local -i v_diff=$1
+
+ while [[ $(get_fc_lc_diff) -gt $v_diff ]]; do
+ if (( $loop >= ${#delays[*]} )) ; then
+ echo "maps were not trimmed"
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+}
+
+function TEST_mon_last_clean_epoch() {
+
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x --mon-warn-on-pool-no-app=false || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ osd_pid=$(cat $dir/osd.2.pid)
+
+ sleep 5
+
+ ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1
+ ceph tell 'mon.*' injectargs \
+ '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1
+
+ create_pool foo 32
+ create_pool bar 32
+
+ foo_id=$(get_pool_id "foo")
+ bar_id=$(get_pool_id "bar")
+
+ [[ $foo_id -lt 0 ]] && ( echo "couldn't find pool 'foo' id" ; exit 1 )
+ [[ $bar_id -lt 0 ]] && ( echo "couldn't find pool 'bar' id" ; exit 1 )
+
+ # no real clue why we are getting these warnings, but let's make them go
+ # away so we can be happy.
+
+ ceph osd set-full-ratio 0.97
+ ceph osd set-backfillfull-ratio 0.97
+
+ wait_for_health_ok || exit 1
+
+ pre_map_diff=$(get_fc_lc_diff)
+ wait_for_total_num_maps 2
+ post_map_diff=$(get_fc_lc_diff)
+
+ [[ $post_map_diff -le $pre_map_diff ]] || exit 1
+
+ pre_map_diff=$post_map_diff
+
+ ceph osd pool set foo size 3
+ ceph osd pool set bar size 3
+
+ wait_for_health_ok || exit 1
+
+ check_lec_equals_pools || exit 1
+ check_lec_honours_osd || exit 1
+ validate_fc || exit 1
+
+ # down osd.2; expected result (because all pools' size equals 3):
+ # - number of committed maps increase over 2
+ # - lec equals fc
+ # - lec equals osd.2's epoch
+ # - all pools have floor equal to lec
+
+ while kill $osd_pid ; do sleep 1 ; done
+ ceph osd out 2
+ sleep 5 # seriously, just to make sure things settle; we may not need this.
+
+ # generate some maps
+ for ((i=0; i <= 10; ++i)); do
+ ceph osd set noup
+ sleep 1
+ ceph osd unset noup
+ sleep 1
+ done
+
+ post_map_diff=$(get_fc_lc_diff)
+ [[ $post_map_diff -gt 2 ]] || exit 1
+
+ validate_fc || exit 1
+ check_lec_equals_pools || exit 1
+ check_lec_honours_osd 2 || exit 1
+
+ # adjust pool 'bar' size to 2; expect:
+ # - number of committed maps still over 2
+ # - lec equals fc
+ # - lec equals pool 'foo' floor
+ # - pool 'bar' floor greater than pool 'foo'
+
+ ceph osd pool set bar size 2
+
+ diff_ver=$(get_fc_lc_diff)
+ [[ $diff_ver -gt 2 ]] || exit 1
+
+ validate_fc || exit 1
+
+ check_lec_equals_pools $foo_id || exit 1
+ check_lec_lower_than_pool $bar_id || exit 1
+
+ check_floor_pool_greater_than_pool $bar_id $foo_id || exit 1
+
+ # set pool 'foo' size to 2; expect:
+ # - health_ok
+ # - lec equals pools
+ # - number of committed maps decreases
+ # - lec equals fc
+
+ pre_map_diff=$(get_fc_lc_diff)
+
+ ceph osd pool set foo size 2 || exit 1
+ wait_for_clean || exit 1
+
+ check_lec_equals_pools || exit 1
+ validate_fc || exit 1
+
+ if ! wait_for_total_num_maps 2 ; then
+ post_map_diff=$(get_fc_lc_diff)
+ # number of maps is decreasing though, right?
+ [[ $post_map_diff -lt $pre_map_diff ]] || exit 1
+ fi
+
+ # bring back osd.2; expect:
+ # - health_ok
+ # - lec equals fc
+ # - number of committed maps equals 2
+ # - all pools have floor equal to lec
+
+ pre_map_diff=$(get_fc_lc_diff)
+
+ activate_osd $dir 2 || exit 1
+ wait_for_health_ok || exit 1
+ validate_fc || exit 1
+ check_lec_equals_pools || exit 1
+
+ if ! wait_for_total_num_maps 2 ; then
+ post_map_diff=$(get_fc_lc_diff)
+ # number of maps is decreasing though, right?
+ [[ $post_map_diff -lt $pre_map_diff ]] || exit 1
+ fi
+
+ return 0
+}
+
+main mon-last-clean-epoch "$@"
diff --git a/qa/standalone/mon/mon-osdmap-prune.sh b/qa/standalone/mon/mon-osdmap-prune.sh
new file mode 100755
index 000000000..f8f7876bb
--- /dev/null
+++ b/qa/standalone/mon/mon-osdmap-prune.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+base_test=$CEPH_ROOT/qa/workunits/mon/test_mon_osdmap_prune.sh
+
+function run() {
+
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7115"
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_osdmap_prune() {
+
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ sleep 5
+
+ # we are getting OSD_OUT_OF_ORDER_FULL health errors, and it's not clear
+ # why. so, to make the health checks happy, mask those errors.
+ ceph osd set-full-ratio 0.97
+ ceph osd set-backfillfull-ratio 0.97
+
+ ceph config set osd osd_beacon_report_interval 10 || return 1
+ ceph config set mon mon_debug_extra_checks true || return 1
+
+ ceph config set mon mon_min_osdmap_epochs 100 || return 1
+ ceph config set mon mon_osdmap_full_prune_enabled true || return 1
+ ceph config set mon mon_osdmap_full_prune_min 200 || return 1
+ ceph config set mon mon_osdmap_full_prune_interval 10 || return 1
+ ceph config set mon mon_osdmap_full_prune_txsize 100 || return 1
+
+
+ bash -x $base_test || return 1
+
+ return 0
+}
+
+main mon-osdmap-prune "$@"
+
diff --git a/qa/standalone/mon/mon-ping.sh b/qa/standalone/mon/mon-ping.sh
new file mode 100755
index 000000000..1f5096be1
--- /dev/null
+++ b/qa/standalone/mon/mon-ping.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 SUSE LINUX GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7119" # git grep '\<7119\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_mon_ping() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ ceph ping mon.a || return 1
+}
+
+main mon-ping "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh"
+# End:
diff --git a/qa/standalone/mon/mon-scrub.sh b/qa/standalone/mon/mon-scrub.sh
new file mode 100755
index 000000000..158bd434c
--- /dev/null
+++ b/qa/standalone/mon/mon-scrub.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7120" # git grep '\<7120\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_mon_scrub() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ ceph mon scrub || return 1
+}
+
+main mon-scrub "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-scrub.sh"
+# End:
diff --git a/qa/standalone/mon/mon-seesaw.sh b/qa/standalone/mon/mon-seesaw.sh
new file mode 100755
index 000000000..1c97847b9
--- /dev/null
+++ b/qa/standalone/mon/mon-seesaw.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
+ export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
+ export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+ export BASE_CEPH_ARGS=$CEPH_ARGS
+ CEPH_ARGS+="--mon-host=$CEPH_MON_A "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_mon_seesaw() {
+ local dir=$1
+
+ setup $dir || return
+
+ # start with 1 mon
+ run_mon $dir aa --public-addr $CEPH_MON_A || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ wait_for_quorum 300 1 || return 1
+
+ # add in a second
+ run_mon $dir bb --public-addr $CEPH_MON_B || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
+ wait_for_quorum 300 2 || return 1
+
+ # remove the first one
+ ceph mon rm aa || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_B"
+ sleep 5
+ wait_for_quorum 300 1 || return 1
+
+ # do some stuff that requires the osds be able to communicate with the
+ # mons. (see http://tracker.ceph.com/issues/17558)
+ ceph osd pool create foo 8
+ rados -p foo bench 1 write
+ wait_for_clean || return 1
+
+ # nuke monstore so that it will rejoin (otherwise we get
+ # "not in monmap and have been in a quorum before; must have been removed"
+ rm -rf $dir/aa
+
+ # add a back in
+ # (use a different addr to avoid bind issues)
+ run_mon $dir aa --public-addr $CEPH_MON_C || return 1
+ CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_C,$CEPH_MON_B"
+ wait_for_quorum 300 2 || return 1
+}
+
+main mon-seesaw "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh"
+# End:
diff --git a/qa/standalone/mon/osd-crush.sh b/qa/standalone/mon/osd-crush.sh
new file mode 100755
index 000000000..aa7cac694
--- /dev/null
+++ b/qa/standalone/mon/osd-crush.sh
@@ -0,0 +1,196 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7104" # git grep '\<7104\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_crush_rule_create_simple() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ ceph --format xml osd crush rule dump replicated_rule | \
+ egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \
+ grep '<op>choose_firstn</op><num>0</num><type>osd</type>' || return 1
+ local rule=rule0
+ local root=host1
+ ceph osd crush add-bucket $root host
+ local failure_domain=osd
+ ceph osd crush rule create-simple $rule $root $failure_domain || return 1
+ ceph osd crush rule create-simple $rule $root $failure_domain 2>&1 | \
+ grep "$rule already exists" || return 1
+ ceph --format xml osd crush rule dump $rule | \
+ egrep '<op>take</op><item>[^<]+</item><item_name>'$root'</item_name>' | \
+ grep '<op>choose_firstn</op><num>0</num><type>'$failure_domain'</type>' || return 1
+ ceph osd crush rule rm $rule || return 1
+}
+
+function TEST_crush_rule_dump() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ local rule=rule1
+ ceph osd crush rule create-erasure $rule || return 1
+ test $(ceph --format json osd crush rule dump $rule | \
+ jq ".rule_name == \"$rule\"") == true || return 1
+ test $(ceph --format json osd crush rule dump | \
+ jq "map(select(.rule_name == \"$rule\")) | length == 1") == true || return 1
+ ! ceph osd crush rule dump non_existent_rule || return 1
+ ceph osd crush rule rm $rule || return 1
+}
+
+function TEST_crush_rule_rm() {
+ local rule=erasure2
+
+ run_mon $dir a || return 1
+
+ ceph osd crush rule create-erasure $rule default || return 1
+ ceph osd crush rule ls | grep $rule || return 1
+ ceph osd crush rule rm $rule || return 1
+ ! ceph osd crush rule ls | grep $rule || return 1
+}
+
+function TEST_crush_rule_create_erasure() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ # should have at least one OSD
+ run_osd $dir 0 || return 1
+
+ local rule=rule3
+ #
+ # create a new rule with the default profile, implicitly
+ #
+ ceph osd crush rule create-erasure $rule || return 1
+ ceph osd crush rule create-erasure $rule 2>&1 | \
+ grep "$rule already exists" || return 1
+ ceph --format xml osd crush rule dump $rule | \
+ egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \
+ grep '<op>chooseleaf_indep</op><num>0</num><type>host</type>' || return 1
+ ceph osd crush rule rm $rule || return 1
+ ! ceph osd crush rule ls | grep $rule || return 1
+ #
+ # create a new rule with the default profile, explicitly
+ #
+ ceph osd crush rule create-erasure $rule default || return 1
+ ceph osd crush rule ls | grep $rule || return 1
+ ceph osd crush rule rm $rule || return 1
+ ! ceph osd crush rule ls | grep $rule || return 1
+ #
+ # create a new rule and the default profile, implicitly
+ #
+ ceph osd erasure-code-profile rm default || return 1
+ ! ceph osd erasure-code-profile ls | grep default || return 1
+ ceph osd crush rule create-erasure $rule || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+ grep 'profile set default' $dir/mon.a.log || return 1
+ ceph osd erasure-code-profile ls | grep default || return 1
+ ceph osd crush rule rm $rule || return 1
+ ! ceph osd crush rule ls | grep $rule || return 1
+}
+
+function TEST_add_rule_failed() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ local root=host1
+
+ ceph osd crush add-bucket $root host
+ ceph osd crush rule create-simple test_rule1 $root osd firstn || return 1
+ ceph osd crush rule create-simple test_rule2 $root osd firstn || return 1
+ ceph osd getcrushmap > $dir/crushmap || return 1
+ crushtool --decompile $dir/crushmap > $dir/crushmap.txt || return 1
+ for i in $(seq 3 255)
+ do
+ cat <<EOF
+rule test_rule$i {
+ id $i
+ type replicated
+ step take $root
+ step choose firstn 0 type osd
+ step emit
+}
+EOF
+ done >> $dir/crushmap.txt
+ crushtool --compile $dir/crushmap.txt -o $dir/crushmap || return 1
+ ceph osd setcrushmap -i $dir/crushmap || return 1
+ ceph osd crush rule create-simple test_rule_nospace $root osd firstn 2>&1 | grep "Error ENOSPC" || return 1
+
+}
+
+function TEST_crush_rename_bucket() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ ceph osd crush add-bucket host1 host
+ ceph osd tree
+ ! ceph osd tree | grep host2 || return 1
+ ceph osd crush rename-bucket host1 host2 || return 1
+ ceph osd tree
+ ceph osd tree | grep host2 || return 1
+ ceph osd crush rename-bucket host1 host2 || return 1 # idempotency
+ ceph osd crush rename-bucket nonexistent something 2>&1 | grep "Error ENOENT" || return 1
+}
+
+function TEST_crush_ls_node() {
+ local dir=$1
+ run_mon $dir a || return 1
+ ceph osd crush add-bucket default1 root
+ ceph osd crush add-bucket host1 host
+ ceph osd crush move host1 root=default1
+ ceph osd crush ls default1 | grep host1 || return 1
+ ceph osd crush ls default2 2>&1 | grep "Error ENOENT" || return 1
+}
+
+function TEST_crush_reject_empty() {
+ local dir=$1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ # should have at least one OSD
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+
+ local empty_map=$dir/empty_map
+ :> $empty_map.txt
+ crushtool -c $empty_map.txt -o $empty_map.map || return 1
+ expect_failure $dir "Error EINVAL" \
+ ceph osd setcrushmap -i $empty_map.map || return 1
+}
+
+main osd-crush "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/osd-crush.sh"
+# End:
diff --git a/qa/standalone/mon/osd-df.sh b/qa/standalone/mon/osd-df.sh
new file mode 100755
index 000000000..962909fdb
--- /dev/null
+++ b/qa/standalone/mon/osd-df.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_osd_df() {
+ local dir=$1
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ # normal case
+ ceph osd df --f json-pretty | grep osd.0 || return 1
+ ceph osd df --f json-pretty | grep osd.1 || return 1
+ ceph osd df --f json-pretty | grep osd.2 || return 1
+ ceph osd df --f json-pretty | grep osd.3 || return 1
+ ceph osd df --f json-pretty | grep osd.4 || return 1
+ ceph osd df --f json-pretty | grep osd.5 || return 1
+
+ # filter by device class
+ osd_class=$(ceph osd crush get-device-class 0)
+ ceph osd df class $osd_class --f json-pretty | grep 'osd.0' || return 1
+ # post-nautilus we require filter-type no more
+ ceph osd df $osd_class --f json-pretty | grep 'osd.0' || return 1
+ ceph osd crush rm-device-class 0 || return 1
+ ceph osd crush set-device-class aaa 0 || return 1
+ ceph osd df aaa --f json-pretty | grep 'osd.0' || return 1
+ ceph osd df aaa --f json-pretty | grep 'osd.1' && return 1
+ # reset osd.1's device class
+ ceph osd crush rm-device-class 0 || return 1
+ ceph osd crush set-device-class $osd_class 0 || return 1
+
+ # filter by crush node
+ ceph osd df osd.0 --f json-pretty | grep osd.0 || return 1
+ ceph osd df osd.0 --f json-pretty | grep osd.1 && return 1
+ ceph osd crush move osd.0 root=default host=foo || return 1
+ ceph osd crush move osd.1 root=default host=foo || return 1
+ ceph osd crush move osd.2 root=default host=foo || return 1
+ ceph osd crush move osd.3 root=default host=bar || return 1
+ ceph osd crush move osd.4 root=default host=bar || return 1
+ ceph osd crush move osd.5 root=default host=bar || return 1
+ ceph osd df tree foo --f json-pretty | grep foo || return 1
+ ceph osd df tree foo --f json-pretty | grep bar && return 1
+ ceph osd df foo --f json-pretty | grep osd.0 || return 1
+ ceph osd df foo --f json-pretty | grep osd.1 || return 1
+ ceph osd df foo --f json-pretty | grep osd.2 || return 1
+ ceph osd df foo --f json-pretty | grep osd.3 && return 1
+ ceph osd df foo --f json-pretty | grep osd.4 && return 1
+ ceph osd df foo --f json-pretty | grep osd.5 && return 1
+ ceph osd df tree bar --f json-pretty | grep bar || return 1
+ ceph osd df tree bar --f json-pretty | grep foo && return 1
+ ceph osd df bar --f json-pretty | grep osd.0 && return 1
+ ceph osd df bar --f json-pretty | grep osd.1 && return 1
+ ceph osd df bar --f json-pretty | grep osd.2 && return 1
+ ceph osd df bar --f json-pretty | grep osd.3 || return 1
+ ceph osd df bar --f json-pretty | grep osd.4 || return 1
+ ceph osd df bar --f json-pretty | grep osd.5 || return 1
+
+ # filter by pool
+ ceph osd crush rm-device-class all || return 1
+ ceph osd crush set-device-class nvme 0 1 3 4 || return 1
+ ceph osd crush rule create-replicated nvme-rule default host nvme || return 1
+ ceph osd pool create nvme-pool 12 12 nvme-rule || return 1
+ ceph osd df nvme-pool --f json-pretty | grep osd.0 || return 1
+ ceph osd df nvme-pool --f json-pretty | grep osd.1 || return 1
+ ceph osd df nvme-pool --f json-pretty | grep osd.2 && return 1
+ ceph osd df nvme-pool --f json-pretty | grep osd.3 || return 1
+ ceph osd df nvme-pool --f json-pretty | grep osd.4 || return 1
+ ceph osd df nvme-pool --f json-pretty | grep osd.5 && return 1
+
+ teardown $dir || return 1
+}
+
+main osd-df "$@"
diff --git a/qa/standalone/mon/osd-erasure-code-profile.sh b/qa/standalone/mon/osd-erasure-code-profile.sh
new file mode 100755
index 000000000..0afc5fc0b
--- /dev/null
+++ b/qa/standalone/mon/osd-erasure-code-profile.sh
@@ -0,0 +1,240 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7220" # git grep '\<7220\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_set() {
+ local dir=$1
+ local id=$2
+
+ run_mon $dir a || return 1
+
+ local profile=myprofile
+ #
+ # no key=value pairs : use the default configuration
+ #
+ ceph osd erasure-code-profile set $profile 2>&1 || return 1
+ ceph osd erasure-code-profile get $profile | \
+ grep plugin=jerasure || return 1
+ ceph osd erasure-code-profile rm $profile
+ #
+ # key=value pairs override the default
+ #
+ ceph osd erasure-code-profile set $profile \
+ key=value plugin=isa || return 1
+ ceph osd erasure-code-profile get $profile | \
+ grep -e key=value -e plugin=isa || return 1
+ #
+ # --force is required to override an existing profile
+ #
+ ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1
+ grep 'will not override' $dir/out || return 1
+ ceph osd erasure-code-profile set $profile key=other --force || return 1
+ ceph osd erasure-code-profile get $profile | \
+ grep key=other || return 1
+
+ ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_ls() {
+ local dir=$1
+ local id=$2
+
+ run_mon $dir a || return 1
+
+ local profile=myprofile
+ ! ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile set $profile 2>&1 || return 1
+ ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph --format xml osd erasure-code-profile ls | \
+ grep "<profile>$profile</profile>" || return 1
+
+ ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_rm() {
+ local dir=$1
+ local id=$2
+
+ run_mon $dir a || return 1
+
+ local profile=myprofile
+ ceph osd erasure-code-profile set $profile 2>&1 || return 1
+ ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile rm $profile || return 1
+ ! ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile rm WRONG 2>&1 | \
+ grep "WRONG does not exist" || return 1
+
+ ceph osd erasure-code-profile set $profile || return 1
+ create_pool poolname 12 12 erasure $profile || return 1
+ ! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1
+ grep "poolname.*using.*$profile" $dir/out || return 1
+ ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1
+ ceph osd erasure-code-profile rm $profile || return 1
+
+ ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_get() {
+ local dir=$1
+ local id=$2
+
+ run_mon $dir a || return 1
+
+ local default_profile=default
+ ceph osd erasure-code-profile get $default_profile | \
+ grep plugin=jerasure || return 1
+ ceph --format xml osd erasure-code-profile get $default_profile | \
+ grep '<plugin>jerasure</plugin>' || return 1
+ ! ceph osd erasure-code-profile get WRONG > $dir/out 2>&1 || return 1
+ grep -q "unknown erasure code profile 'WRONG'" $dir/out || return 1
+}
+
+function TEST_set_idempotent() {
+ local dir=$1
+ local id=$2
+
+ run_mon $dir a || return 1
+ #
+ # The default profile is set using a code path different from
+ # ceph osd erasure-code-profile set: verify that it is idempotent,
+ # as if it was using the same code path.
+ #
+ ceph osd erasure-code-profile set default k=2 m=2 2>&1 || return 1
+ local profile
+ #
+ # Because plugin=jerasure is the default, it uses a slightly
+ # different code path where defaults (m=1 for instance) are added
+ # implicitly.
+ #
+ profile=profileidempotent1
+ ! ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1
+ ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1
+ ceph osd erasure-code-profile rm $profile # cleanup
+
+ #
+ # In the general case the profile is exactly what is on
+ #
+ profile=profileidempotent2
+ ! ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1
+ ceph osd erasure-code-profile ls | grep $profile || return 1
+ ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1
+ ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_format_invalid() {
+ local dir=$1
+
+ local profile=profile
+ # osd_pool_default_erasure-code-profile is
+ # valid JSON but not of the expected type
+ run_mon $dir a \
+ --osd_pool_default_erasure-code-profile 1 || return 1
+ ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1
+ cat $dir/out
+ grep 'must be a JSON object' $dir/out || return 1
+}
+
+function TEST_format_json() {
+ local dir=$1
+
+ # osd_pool_default_erasure-code-profile is JSON
+ expected='"plugin":"isa"'
+ run_mon $dir a \
+ --osd_pool_default_erasure-code-profile "{$expected}" || return 1
+ ceph --format json osd erasure-code-profile get default | \
+ grep "$expected" || return 1
+}
+
+function TEST_format_plain() {
+ local dir=$1
+
+ # osd_pool_default_erasure-code-profile is plain text
+ expected='"plugin":"isa"'
+ run_mon $dir a \
+ --osd_pool_default_erasure-code-profile "plugin=isa" || return 1
+ ceph --format json osd erasure-code-profile get default | \
+ grep "$expected" || return 1
+}
+
+function TEST_profile_k_sanity() {
+ local dir=$1
+ local profile=profile-sanity
+
+ run_mon $dir a || return 1
+
+ expect_failure $dir 'k must be a multiple of (k + m) / l' \
+ ceph osd erasure-code-profile set $profile \
+ plugin=lrc \
+ l=1 \
+ k=1 \
+ m=1 || return 1
+
+ if erasure_code_plugin_exists isa ; then
+ expect_failure $dir 'k=1 must be >= 2' \
+ ceph osd erasure-code-profile set $profile \
+ plugin=isa \
+ k=1 \
+ m=1 || return 1
+ else
+ echo "SKIP because plugin isa has not been built"
+ fi
+
+ expect_failure $dir 'k=1 must be >= 2' \
+ ceph osd erasure-code-profile set $profile \
+ plugin=jerasure \
+ k=1 \
+ m=1 || return 1
+}
+
+function TEST_invalid_crush_failure_domain() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+
+ local profile=ec_profile
+ local crush_failure_domain=invalid_failure_domain
+
+ ! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1
+}
+
+main osd-erasure-code-profile "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/osd-erasure-code-profile.sh"
+# End:
diff --git a/qa/standalone/mon/osd-pool-create.sh b/qa/standalone/mon/osd-pool-create.sh
new file mode 100755
index 000000000..6d2c5ad3e
--- /dev/null
+++ b/qa/standalone/mon/osd-pool-create.sh
@@ -0,0 +1,307 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013, 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7105" # git grep '\<7105\>' : there must be only one
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ export CEPH_ARGS
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+# Before http://tracker.ceph.com/issues/8307 the invalid profile was created
+function TEST_erasure_invalid_profile() {
+ local dir=$1
+ run_mon $dir a || return 1
+ local poolname=pool_erasure
+ local notaprofile=not-a-valid-erasure-code-profile
+ ! ceph osd pool create $poolname 12 12 erasure $notaprofile || return 1
+ ! ceph osd erasure-code-profile ls | grep $notaprofile || return 1
+}
+
+function TEST_erasure_crush_rule() {
+ local dir=$1
+ run_mon $dir a || return 1
+ #
+ # choose the crush rule used with an erasure coded pool
+ #
+ local crush_rule=myrule
+ ! ceph osd crush rule ls | grep $crush_rule || return 1
+ ceph osd crush rule create-erasure $crush_rule
+ ceph osd crush rule ls | grep $crush_rule
+ local poolname
+ poolname=pool_erasure1
+ ! ceph --format json osd dump | grep '"crush_rule":1' || return 1
+ ceph osd pool create $poolname 12 12 erasure default $crush_rule
+ ceph --format json osd dump | grep '"crush_rule":1' || return 1
+ #
+ # a crush rule by the name of the pool is implicitly created
+ #
+ poolname=pool_erasure2
+ ceph osd erasure-code-profile set myprofile
+ ceph osd pool create $poolname 12 12 erasure myprofile
+ ceph osd crush rule ls | grep $poolname || return 1
+ #
+ # a non existent crush rule given in argument is an error
+ # http://tracker.ceph.com/issues/9304
+ #
+ poolname=pool_erasure3
+ ! ceph osd pool create $poolname 12 12 erasure myprofile INVALIDRULE || return 1
+}
+
+function TEST_erasure_code_profile_default() {
+ local dir=$1
+ run_mon $dir a || return 1
+ ceph osd erasure-code-profile rm default || return 1
+ ! ceph osd erasure-code-profile ls | grep default || return 1
+ ceph osd pool create $poolname 12 12 erasure default
+ ceph osd erasure-code-profile ls | grep default || return 1
+}
+
+function TEST_erasure_crush_stripe_unit() {
+ local dir=$1
+ # the default stripe unit is used to initialize the pool
+ run_mon $dir a --public-addr $CEPH_MON
+ stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit)
+ eval local $(ceph osd erasure-code-profile get myprofile | grep k=)
+ stripe_width = $((stripe_unit * k))
+ ceph osd pool create pool_erasure 12 12 erasure
+ ceph --format json osd dump | tee $dir/osd.json
+ grep '"stripe_width":'$stripe_width $dir/osd.json > /dev/null || return 1
+}
+
+function TEST_erasure_crush_stripe_unit_padded() {
+ local dir=$1
+ # setting osd_pool_erasure_code_stripe_unit modifies the stripe_width
+ # and it is padded as required by the default plugin
+ profile+=" plugin=jerasure"
+ profile+=" technique=reed_sol_van"
+ k=4
+ profile+=" k=$k"
+ profile+=" m=2"
+ actual_stripe_unit=2048
+ desired_stripe_unit=$((actual_stripe_unit - 1))
+ actual_stripe_width=$((actual_stripe_unit * k))
+ run_mon $dir a \
+ --osd_pool_erasure_code_stripe_unit $desired_stripe_unit \
+ --osd_pool_default_erasure_code_profile "$profile" || return 1
+ ceph osd pool create pool_erasure 12 12 erasure
+ ceph osd dump | tee $dir/osd.json
+ grep "stripe_width $actual_stripe_width" $dir/osd.json > /dev/null || return 1
+}
+
+function TEST_erasure_code_pool() {
+ local dir=$1
+ run_mon $dir a || return 1
+ ceph --format json osd dump > $dir/osd.json
+ local expected='"erasure_code_profile":"default"'
+ ! grep "$expected" $dir/osd.json || return 1
+ ceph osd pool create erasurecodes 12 12 erasure
+ ceph --format json osd dump | tee $dir/osd.json
+ grep "$expected" $dir/osd.json > /dev/null || return 1
+
+ ceph osd pool create erasurecodes 12 12 erasure 2>&1 | \
+ grep 'already exists' || return 1
+ ceph osd pool create erasurecodes 12 12 2>&1 | \
+ grep 'cannot change to type replicated' || return 1
+}
+
+function TEST_replicated_pool_with_rule() {
+ local dir=$1
+ run_mon $dir a
+ local rule=rule0
+ local root=host1
+ ceph osd crush add-bucket $root host
+ local failure_domain=osd
+ local poolname=mypool
+ ceph osd crush rule create-simple $rule $root $failure_domain || return 1
+ ceph osd crush rule ls | grep $rule
+ ceph osd pool create $poolname 12 12 replicated $rule || return 1
+ rule_id=`ceph osd crush rule dump $rule | grep "rule_id" | awk -F[' ':,] '{print $4}'`
+ ceph osd pool get $poolname crush_rule 2>&1 | \
+ grep "crush_rule: $rule_id" || return 1
+ #non-existent crush rule
+ ceph osd pool create newpool 12 12 replicated non-existent 2>&1 | \
+ grep "doesn't exist" || return 1
+}
+
+function TEST_erasure_code_pool_lrc() {
+ local dir=$1
+ run_mon $dir a || return 1
+
+ ceph osd erasure-code-profile set LRCprofile \
+ plugin=lrc \
+ mapping=DD_ \
+ layers='[ [ "DDc", "" ] ]' || return 1
+
+ ceph --format json osd dump > $dir/osd.json
+ local expected='"erasure_code_profile":"LRCprofile"'
+ local poolname=erasurecodes
+ ! grep "$expected" $dir/osd.json || return 1
+ ceph osd pool create $poolname 12 12 erasure LRCprofile
+ ceph --format json osd dump | tee $dir/osd.json
+ grep "$expected" $dir/osd.json > /dev/null || return 1
+ ceph osd crush rule ls | grep $poolname || return 1
+}
+
+function TEST_replicated_pool() {
+ local dir=$1
+ run_mon $dir a || return 1
+ ceph osd pool create replicated 12 12 replicated replicated_rule || return 1
+ ceph osd pool create replicated 12 12 replicated replicated_rule 2>&1 | \
+ grep 'already exists' || return 1
+ # default is replicated
+ ceph osd pool create replicated1 12 12 || return 1
+ # default is replicated, pgp_num = pg_num
+ ceph osd pool create replicated2 12 || return 1
+ ceph osd pool create replicated 12 12 erasure 2>&1 | \
+ grep 'cannot change to type erasure' || return 1
+}
+
+function TEST_no_pool_delete() {
+ local dir=$1
+ run_mon $dir a || return 1
+ ceph osd pool create foo 1 || return 1
+ ceph tell mon.a injectargs -- --no-mon-allow-pool-delete || return 1
+ ! ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1
+ ceph tell mon.a injectargs -- --mon-allow-pool-delete || return 1
+ ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1
+}
+
+function TEST_utf8_cli() {
+ local dir=$1
+ run_mon $dir a || return 1
+ # Hopefully it's safe to include literal UTF-8 characters to test
+ # the fix for http://tracker.ceph.com/issues/7387. If it turns out
+ # to not be OK (when is the default encoding *not* UTF-8?), maybe
+ # the character '黄' can be replaced with the escape $'\xe9\xbb\x84'
+ OLDLANG="$LANG"
+ export LANG=en_US.UTF-8
+ ceph osd pool create 黄 16 || return 1
+ ceph osd lspools 2>&1 | \
+ grep "黄" || return 1
+ ceph -f json-pretty osd dump | \
+ python3 -c "import json; import sys; json.load(sys.stdin)" || return 1
+ ceph osd pool delete 黄 黄 --yes-i-really-really-mean-it
+ export LANG="$OLDLANG"
+}
+
+function check_pool_priority() {
+ local dir=$1
+ shift
+ local pools=$1
+ shift
+ local spread="$1"
+ shift
+ local results="$1"
+
+ setup $dir || return 1
+
+ EXTRA_OPTS="--debug_allow_any_pool_priority=true"
+ export EXTRA_OPTS
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ # Add pool 0 too
+ for i in $(seq 0 $pools)
+ do
+ num=$(expr $i + 1)
+ ceph osd pool create test${num} 1 1
+ done
+
+ wait_for_clean || return 1
+ for i in $(seq 0 $pools)
+ do
+ num=$(expr $i + 1)
+ ceph osd pool set test${num} recovery_priority $(expr $i \* $spread)
+ done
+
+ #grep "recovery_priority.*pool set" out/mon.a.log
+
+ bin/ceph osd dump
+
+ # Restart everything so mon converts the priorities
+ kill_daemons
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ activate_osd $dir 0 || return 1
+ activate_osd $dir 1 || return 1
+ activate_osd $dir 2 || return 1
+ sleep 5
+
+ grep convert $dir/mon.a.log
+ ceph osd dump
+
+ pos=1
+ for i in $(ceph osd dump | grep ^pool | sed 's/.*recovery_priority //' | awk '{ print $1 }')
+ do
+ result=$(echo $results | awk "{ print \$${pos} }")
+ # A value of 0 is an unset value so sed/awk gets "pool"
+ if test $result = "0"
+ then
+ result="pool"
+ fi
+ test "$result" = "$i" || return 1
+ pos=$(expr $pos + 1)
+ done
+}
+
+function TEST_pool_pos_only_prio() {
+ local dir=$1
+ check_pool_priority $dir 20 5 "0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10" || return 1
+}
+
+function TEST_pool_neg_only_prio() {
+ local dir=$1
+ check_pool_priority $dir 20 -5 "0 0 -1 -1 -2 -2 -3 -3 -4 -4 -5 -5 -6 -6 -7 -7 -8 -8 -9 -9 -10" || return 1
+}
+
+function TEST_pool_both_prio() {
+ local dir=$1
+ check_pool_priority $dir 20 "5 - 50" "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10" || return 1
+}
+
+function TEST_pool_both_prio_no_neg() {
+ local dir=$1
+ check_pool_priority $dir 20 "2 - 4" "-4 -2 0 0 1 1 2 2 3 3 4 5 5 6 6 7 7 8 8 9 10" || return 1
+}
+
+function TEST_pool_both_prio_no_pos() {
+ local dir=$1
+ check_pool_priority $dir 20 "2 - 36" "-10 -9 -8 -8 -7 -7 -6 -6 -5 -5 -4 -3 -3 -2 -2 -1 -1 0 0 2 4" || return 1
+}
+
+
+main osd-pool-create "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/osd-pool-create.sh"
+# End:
diff --git a/qa/standalone/mon/osd-pool-df.sh b/qa/standalone/mon/osd-pool-df.sh
new file mode 100755
index 000000000..d2b80ec72
--- /dev/null
+++ b/qa/standalone/mon/osd-pool-df.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Tencent <contact@tencent.com>
+#
+# Author: Chang Liu <liuchang0812@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_ceph_df() {
+ local dir=$1
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+ run_mgr $dir x || return 1
+
+ profile+=" plugin=jerasure"
+ profile+=" technique=reed_sol_van"
+ profile+=" k=4"
+ profile+=" m=2"
+ profile+=" crush-failure-domain=osd"
+
+ ceph osd erasure-code-profile set ec42profile ${profile}
+
+ local rep_poolname=testcephdf_replicate
+ local ec_poolname=testcephdf_erasurecode
+ create_pool $rep_poolname 6 6 replicated
+ create_pool $ec_poolname 6 6 erasure ec42profile
+ flush_pg_stats
+
+ local global_avail=`ceph df -f json | jq '.stats.total_avail_bytes'`
+ local rep_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$rep_poolname'"))[0].stats.max_avail'`
+ local ec_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$ec_poolname'"))[0].stats.max_avail'`
+
+ echo "${global_avail} >= ${rep_avail}*3" | bc || return 1
+ echo "${global_avail} >= ${ec_avail}*1.5" | bc || return 1
+
+ ceph osd pool delete $rep_poolname $rep_poolname --yes-i-really-really-mean-it
+ ceph osd pool delete $ec_poolname $ec_poolname --yes-i-really-really-mean-it
+ ceph osd erasure-code-profile rm ec42profile
+ teardown $dir || return 1
+}
+
+main osd-pool-df "$@"
diff --git a/qa/standalone/mon/test_pool_quota.sh b/qa/standalone/mon/test_pool_quota.sh
new file mode 100755
index 000000000..b87ec2232
--- /dev/null
+++ b/qa/standalone/mon/test_pool_quota.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+
+#
+# Generic pool quota test
+#
+
+# Includes
+
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:17108" # git grep '\<17108\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+function TEST_pool_quota() {
+ local dir=$1
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ local poolname=testquota
+ create_pool $poolname 20
+ local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'`
+ local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'`
+
+ echo $objects
+ echo $bytes
+ if [ $objects != 'N/A' ] || [ $bytes != 'N/A' ] ;
+ then
+ return 1
+ fi
+
+ ceph osd pool set-quota $poolname max_objects 1000
+ ceph osd pool set-quota $poolname max_bytes 1024
+
+ objects=`ceph df detail | grep -w $poolname|awk '{print $3}'`
+ bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'`
+
+ if [ $objects != '1000' ] || [ $bytes != '1K' ] ;
+ then
+ return 1
+ fi
+
+ ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
+ teardown $dir || return 1
+}
+
+main testpoolquota
diff --git a/qa/standalone/osd-backfill/osd-backfill-prio.sh b/qa/standalone/osd-backfill/osd-backfill-prio.sh
new file mode 100755
index 000000000..9749ca34c
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-prio.sh
@@ -0,0 +1,522 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+ CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
+ # Set osd op queue = wpq for the tests. Backfill priority is not
+ # considered by mclock_scheduler leading to unexpected results.
+ CEPH_ARGS+="--osd-op-queue=wpq "
+ export objects=50
+ export poolprefix=test
+ export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED
+ export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
+ export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function TEST_backfill_priority() {
+ local dir=$1
+ local pools=10
+ local OSDS=5
+ # size 2 -> 1 means degraded by 1, so add 1 to base prio
+ local degraded_prio=$(expr $DEGRADED_PRIO + 1)
+ local max_tries=10
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 3 pools with a pg with the same primaries but second
+ # replica on another osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1_1
+ local chk_osd1_2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ local chk_osd2
+
+ local PG3
+ local POOLNUM3
+ local pool3
+
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ -z "$PG1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1_1=$test_osd1
+ chk_osd1_2=$test_osd2
+ elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ chk_osd2=$test_osd2
+ elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
+ then
+ PG3="${p}.0"
+ POOLNUM3=$p
+ pool3="${poolprefix}$p"
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" -o "pool3" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/data bs=1M count=10
+ p=1
+ for pname in $pool1 $pool2 $pool3
+ do
+ for i in $(seq 1 $objects)
+ do
+ rados -p ${pname} put obj${i}-p${p} $dir/data
+ done
+ p=$(expr $p + 1)
+ done
+
+ local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+ ceph pg dump pgs
+ ERRORS=0
+
+ ceph osd set nobackfill
+ ceph osd set noout
+
+ # Get a pg to want to backfill and quickly force it
+ # to be preempted.
+ ceph osd pool set $pool3 size 2
+ sleep 2
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ # 3. Item is in progress, adjust priority with no higher priority waiting
+ for i in $(seq 1 $max_tries)
+ do
+ if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then
+ break
+ fi
+ if [ "$i" = "$max_tries" ]; then
+ echo "ERROR: Didn't appear to be able to force-backfill"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ sleep 2
+ done
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ ceph osd out osd.$chk_osd1_2
+ sleep 2
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+ ceph pg dump pgs
+
+ ceph osd pool set $pool2 size 2
+ sleep 2
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ ceph pg dump pgs
+
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG3} ];
+ then
+ echo "The force-backfill PG $PG3 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # 1. Item is queued, re-queue with new priority
+ for i in $(seq 1 $max_tries)
+ do
+ if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then
+ break
+ fi
+ if [ "$i" = "$max_tries" ]; then
+ echo "ERROR: Didn't appear to be able to force-backfill"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ sleep 2
+ done
+ sleep 2
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+ if [ "$PRIO" != "$FORCE_PRIO" ];
+ then
+ echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ flush_pg_stats || return 1
+
+ # 4. Item is in progress, if higher priority items waiting prempt item
+ ceph pg cancel-force-backfill $PG3 || return 1
+ sleep 2
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
+ if [ "$PRIO" != "$degraded_prio" ];
+ then
+ echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The force-recovery PG $PG2 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ ceph pg cancel-force-backfill $PG2 || return 1
+ sleep 5
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
+ flush_pg_stats || return 1
+ ceph pg force-backfill $PG3 || return 1
+ sleep 2
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+ if [ "$PRIO" != "$degraded_prio" ];
+ then
+ echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG3} ];
+ then
+ echo "The force-backfill PG $PG3 didn't get promoted to an in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ ceph osd unset noout
+ ceph osd unset nobackfill
+
+ wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
+
+ ceph pg dump pgs
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "$ERRORS error(s) found"
+ else
+ echo TEST PASSED
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ delete_pool $pool3
+ kill_daemons $dir || return 1
+ return $ERRORS
+}
+
+#
+# Show that pool recovery_priority is added to the backfill priority
+#
+# Create 2 pools with 2 OSDs with different primarys
+# pool 1 with recovery_priority 1
+# pool 2 with recovery_priority 2
+#
+# Start backfill by changing the pool sizes from 1 to 2
+# Use dump_recovery_reservations to verify priorities
+function TEST_backfill_pool_priority() {
+ local dir=$1
+ local pools=3 # Don't assume the first 2 pools are exact what we want
+ local OSDS=2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 2 pools with different primaries which
+ # means the replica must be on another osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1_1
+ local chk_osd1_2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ local chk_osd2_1
+ local chk_osd2_2
+
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ -z "$PG1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1_1=$test_osd1
+ chk_osd1_2=$test_osd2
+ elif [ $chk_osd1_1 != $test_osd1 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ chk_osd2_1=$test_osd1
+ chk_osd2_2=$test_osd2
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ pool1_extra_prio=1
+ pool2_extra_prio=2
+ # size 2 -> 1 means degraded by 1, so add 1 to base prio
+ pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio)
+ pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio)
+
+ ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/data bs=1M count=10
+ p=1
+ for pname in $pool1 $pool2
+ do
+ for i in $(seq 1 $objects)
+ do
+ rados -p ${pname} put obj${i}-p${p} $dir/data
+ done
+ p=$(expr $p + 1)
+ done
+
+ local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+ ceph pg dump pgs
+ ERRORS=0
+
+ ceph osd pool set $pool1 size 2
+ ceph osd pool set $pool2 size 2
+ sleep 5
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
+ echo osd.${chk_osd1_1}
+ cat $dir/dump.${chk_osd1_1}.out
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
+ echo osd.${chk_osd1_2}
+ cat $dir/dump.${chk_osd1_2}.out
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG1} ];
+ then
+ echo "The primary PG ${PG1} didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool1_prio ];
+ then
+ echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG1} ];
+ then
+ echo "The primary PG ${PG1} didn't become the in progress item on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool1_prio ];
+ then
+ echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The primary PG ${PG2} didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool2_prio ];
+ then
+ echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The primary PG $PG2 didn't become the in progress item on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool2_prio ];
+ then
+ echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ wait_for_clean || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "$ERRORS error(s) found"
+ else
+ echo TEST PASSED
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ kill_daemons $dir || return 1
+ return $ERRORS
+}
+
+main osd-backfill-prio "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh"
+# End:
diff --git a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh
new file mode 100755
index 000000000..f9a144932
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7129" # git grep '\<7129\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+ CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function _common_test() {
+ local dir=$1
+ local extra_opts="$2"
+ local loglen="$3"
+ local dupslen="$4"
+ local objects="$5"
+ local moreobjects=${6:-0}
+
+ local OSDS=6
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+ export EXTRA_OPTS=" $extra_opts"
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ create_pool test 1 1
+
+ for j in $(seq 1 $objects)
+ do
+ rados -p test put obj-${j} /etc/passwd
+ done
+
+ # Mark out all OSDs for this pool
+ ceph osd out $(ceph pg dump pgs --format=json | jq '.pg_stats[0].up[]')
+ if [ "$moreobjects" != "0" ]; then
+ for j in $(seq 1 $moreobjects)
+ do
+ rados -p test put obj-more-${j} /etc/passwd
+ done
+ fi
+ sleep 1
+ wait_for_clean
+
+ flush_pg_stats
+
+ newprimary=$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')
+ kill_daemons
+
+ ERRORS=0
+ _objectstore_tool_nodown $dir $newprimary --no-mon-config --pgid 1.0 --op log | tee $dir/result.log
+ LOGLEN=$(jq '.pg_log_t.log | length' $dir/result.log)
+ if [ $LOGLEN != "$loglen" ]; then
+ echo "FAILED: Wrong log length got $LOGLEN (expected $loglen)"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ DUPSLEN=$(jq '.pg_log_t.dups | length' $dir/result.log)
+ if [ $DUPSLEN != "$dupslen" ]; then
+ echo "FAILED: Wrong dups length got $DUPSLEN (expected $dupslen)"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ grep "copy_up_to\|copy_after" $dir/osd.*.log
+ rm -f $dir/result.log
+ if [ $ERRORS != "0" ]; then
+ echo TEST FAILED
+ return 1
+ fi
+}
+
+
+# Cause copy_up_to() to only partially copy logs, copy additional dups, and trim dups
+function TEST_backfill_log_1() {
+ local dir=$1
+
+ _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2 --osd_pg_log_dups_tracked=10" 2 8 150
+}
+
+
+# Cause copy_up_to() to only partially copy logs, copy additional dups
+function TEST_backfill_log_2() {
+ local dir=$1
+
+ _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2" 2 148 150
+}
+
+
+# Cause copy_after() to only copy logs, no dups
+function TEST_recovery_1() {
+ local dir=$1
+
+ _common_test $dir "--osd_min_pg_log_entries=50 --osd_max_pg_log_entries=50 --osd_pg_log_dups_tracked=60 --osd_pg_log_trim_min=10" 40 0 40
+}
+
+
+# Cause copy_after() to copy logs with dups
+function TEST_recovery_2() {
+ local dir=$1
+
+ _common_test $dir "--osd_min_pg_log_entries=150 --osd_max_pg_log_entries=150 --osd_pg_log_dups_tracked=3000 --osd_pg_log_trim_min=10" 151 10 141 20
+}
+
+main osd-backfill-recovery-log "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-recovery-log.sh"
+# End:
diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh
new file mode 100755
index 000000000..6a5c69412
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-space.sh
@@ -0,0 +1,1176 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
+ CEPH_ARGS+="--fake_statfs_for_testing=3686400 "
+ CEPH_ARGS+="--osd_max_backfills=10 "
+ CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+ export objects=600
+ export poolprefix=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function get_num_in_state() {
+ local state=$1
+ local expression
+ expression+="select(contains(\"${state}\"))"
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+
+function wait_for_not_state() {
+ local state=$1
+ local num_in_state=-1
+ local cur_in_state
+ local -a delays=($(get_timeout_delays $2 5))
+ local -i loop=0
+
+ flush_pg_stats || return 1
+ while test $(get_num_pgs) == 0 ; do
+ sleep 1
+ done
+
+ while true ; do
+ cur_in_state=$(get_num_in_state ${state})
+ test $cur_in_state = "0" && break
+ if test $cur_in_state != $num_in_state ; then
+ loop=0
+ num_in_state=$cur_in_state
+ elif (( $loop >= ${#delays[*]} )) ; then
+ ceph pg dump pgs
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+ return 0
+}
+
+
+function wait_for_not_backfilling() {
+ local timeout=$1
+ wait_for_not_state backfilling $timeout
+}
+
+
+function wait_for_not_activating() {
+ local timeout=$1
+ wait_for_not_state activating $timeout
+}
+
+# All tests are created in an environment which has fake total space
+# of 3600K (3686400) which can hold 600 6K replicated objects or
+# 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool
+# we have a theoretical 54K object but with the chunk size of 4K
+# and a rounding of 4K to account for the chunks is 36K max object
+# which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of
+# 3600K for a shard.
+
+# Create 2 pools with size 1
+# Write enough data that only 1 pool pg can fit per osd
+# Incresase the pool size to 2
+# On 3 OSDs this should result in 1 OSD with overlapping replicas,
+# so both pools can't fit. We assume pgid 1.0 and 2.0 won't
+# map to the same 2 OSDs.
+# At least 1 pool shouldn't have room to backfill
+# All other pools should go active+clean
+function TEST_backfill_test_simple() {
+ local dir=$1
+ local pools=2
+ local OSDS=3
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+ done
+
+ wait_for_clean || return 1
+
+ # This won't work is if the 2 pools primary and only osds
+ # are the same.
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+ for o in $(seq 1 $objects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+ done
+
+ ceph pg dump pgs
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 30
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ expected="$(expr $pools - 1)"
+ if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
+ then
+ echo "$expected didn't finish backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+ ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+
+# Create 8 pools of size 1 on 20 OSDs
+# Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
+# Increase pool size to 2
+# At least 1 pool shouldn't have room to backfill
+# All other pools should go active+clean
+function TEST_backfill_test_multi() {
+ local dir=$1
+ local pools=8
+ local OSDS=20
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+ done
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+ for o in $(seq 1 $objects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+ done
+
+ ceph pg dump pgs
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 30
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ERRORS=0
+ full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
+ if [ "$full" -lt "1" ];
+ then
+ echo "At least one pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ expected="$(expr $pools - $full)"
+ if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
+ then
+ echo "$expected didn't finish backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+ ceph status
+
+ ceph status --format=json-pretty > $dir/stat.json
+
+ eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
+ if [ "$SEV" != "HEALTH_WARN" ]; then
+ echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
+ if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
+ echo "PG_BACKFILL_FULL message '$MSG' mismatched"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ rm -f $dir/stat.json
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ # Work around for http://tracker.ceph.com/issues/38195
+ kill_daemons $dir #|| return 1
+ ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+
+# To make sure that when 2 pg try to backfill at the same time to
+# the same target. This might be covered by the simple test above
+# but this makes sure we get it.
+#
+# Create 10 pools of size 2 and identify 2 that have the same
+# non-primary osd.
+# Delete all other pools
+# Set size to 1 and write 4K * 600 to each pool
+# Set size back to 2
+# The 2 pools should race to backfill.
+# One pool goes active+clean
+# The other goes acitve+...+backfill_toofull
+function TEST_backfill_test_sametarget() {
+ local dir=$1
+ local pools=10
+ local OSDS=5
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 2 pools with a pg that distinct primaries but second
+ # replica on the same osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1
+ local chk_osd2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ $p = "1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1=$test_osd1
+ chk_osd2=$test_osd2
+ elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+ for i in $(seq 1 $objects)
+ do
+ rados -p $pool1 put obj$i $dir/datafile
+ rados -p $pool2 put obj$i $dir/datafile
+ done
+
+ ceph osd pool set $pool1 size 2
+ ceph osd pool set $pool2 size 2
+ sleep 30
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
+ then
+ echo "One didn't finish backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ kill_daemons $dir || return 1
+ ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+# 2 pools can't both backfill to a target which has other data
+# 1 of the pools has objects that increase from 1024 to 2611 bytes
+#
+# Write to fill pool which is size 1
+# Take fill pool osd down (other 2 pools must go to the remaining OSDs
+# Save an export of data on fill OSD and restart it
+# Write an intial 1K to pool1 which has pg 2.0
+# Export 2.0 from non-fillpool OSD don't wait for it to start-up
+# Take down fillpool OSD
+# Put 1K object version of 2.0 on fillpool OSD
+# Put back fillpool data on fillpool OSD
+# With fillpool down write 2611 byte objects
+# Take down $osd and bring back $fillosd simultaneously
+# Wait for backfilling
+# One PG will be able to backfill its remaining data
+# One PG must get backfill_toofull
+function TEST_backfill_multi_partial() {
+ local dir=$1
+ local EC=$2
+ local pools=2
+ local OSDS=3
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+
+ ceph osd set-require-min-compat-client luminous
+ create_pool fillpool 1 1
+ ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+
+ wait_for_clean || return 1
+
+ # Partially fill an osd
+ # We have room for 600 6K replicated objects, if we create 2611 byte objects
+ # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
+ # replica from the other 2 is 85% of 3600K
+
+ dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
+ for o in $(seq 1 $objects)
+ do
+ rados -p fillpool put obj-fill-${o} $dir/datafile
+ done
+
+ local fillosd=$(get_primary fillpool obj-fill-1)
+ osd=$(expr $fillosd + 1)
+ if [ "$osd" = "$OSDS" ]; then
+ osd="0"
+ fi
+
+ kill_daemon $dir/osd.$fillosd.pid TERM
+ ceph osd out osd.$fillosd
+
+ _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1
+ activate_osd $dir $fillosd || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
+ for o in $(seq 1 $objects)
+ do
+ rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile
+ done
+
+ ceph pg dump pgs
+ # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
+ _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out
+ kill_daemon $dir/osd.$fillosd.pid TERM
+ _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0
+ _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1
+ _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1
+ ceph pg dump pgs
+ sleep 20
+ ceph pg dump pgs
+
+ # re-write everything
+ dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
+ for o in $(seq 1 $objects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
+ done
+ done
+
+ kill_daemon $dir/osd.$osd.pid TERM
+ ceph osd out osd.$osd
+
+ activate_osd $dir $fillosd || return 1
+ ceph osd in osd.$fillosd
+ sleep 30
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ flush_pg_stats || return 1
+ ceph pg dump pgs
+
+ ERRORS=0
+ if [ "$(get_num_in_state backfill_toofull)" != "1" ];
+ then
+ echo "One PG should be in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ "$(get_num_in_state active+clean)" != "2" ];
+ then
+ echo "Two PGs should be active+clean after one PG completed backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ delete_pool fillpool
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+ ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+# Make sure that the amount of bytes already on the replica doesn't
+# cause an out of space condition
+#
+# Create 1 pool and write 4K * 600 objects
+# Remove 25% (150) of the objects with one OSD down (noout set)
+# Increase the size of the remaining 75% (450) of the objects to 6K
+# Bring back down OSD
+# The pool should go active+clean
+function TEST_backfill_grow() {
+ local dir=$1
+ local poolname="test"
+ local OSDS=3
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ sleep 5
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i $dir/4kdata
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set noout
+ kill_daemons $dir TERM $otherosd || return 1
+
+ rmobjects=$(expr $objects / 4)
+ for i in $(seq 1 $rmobjects)
+ do
+ rados -p $poolname rm obj$i
+ done
+
+ dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
+ for i in $(seq $(expr $rmobjects + 1) $objects)
+ do
+ rados -p $poolname put obj$i $dir/6kdata
+ done
+
+ activate_osd $dir $otherosd || return 1
+
+ ceph tell osd.$primary debug kick_recovery_wq 0
+
+ sleep 2
+
+ wait_for_clean || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+ ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+# Create a 5 shard EC pool on 6 OSD cluster
+# Fill 1 OSD with 2600K of data take that osd down.
+# Write the EC pool on 5 OSDs
+# Take down 1 (must contain an EC shard)
+# Bring up OSD with fill data
+# Not enought room to backfill to partially full OSD
+function TEST_ec_backfill_simple() {
+ local dir=$1
+ local EC=$2
+ local pools=1
+ local OSDS=6
+ local k=3
+ local m=2
+ local ecobjects=$(expr $objects / $k)
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+ create_pool fillpool 1 1
+ ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+
+ # Partially fill an osd
+ # We have room for 200 18K replicated objects, if we create 13K objects
+ # there is only 3600K - (13K * 200) = 1000K which won't hold
+ # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
+ # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
+ # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical
+ # bytes in the pool.
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
+ for o in $(seq 1 $ecobjects)
+ do
+ rados -p fillpool put obj$o $dir/datafile
+ done
+
+ local fillosd=$(get_primary fillpool obj1)
+ osd=$(expr $fillosd + 1)
+ if [ "$osd" = "$OSDS" ]; then
+ osd="0"
+ fi
+
+ sleep 5
+ kill_daemon $dir/osd.$fillosd.pid TERM
+ ceph osd out osd.$fillosd
+ sleep 2
+ ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+ done
+
+ # Can't wait for clean here because we created a stale pg
+ #wait_for_clean || return 1
+ sleep 5
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
+ for o in $(seq 1 $ecobjects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+ done
+
+ kill_daemon $dir/osd.$osd.pid TERM
+ ceph osd out osd.$osd
+
+ activate_osd $dir $fillosd || return 1
+ ceph osd in osd.$fillosd
+ sleep 30
+
+ ceph pg dump pgs
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ceph pg dump pgs
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then
+ echo "One pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ delete_pool fillpool
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+function osdlist() {
+ local OSDS=$1
+ local excludeosd=$2
+
+ osds=""
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ if [ $osd = $excludeosd ];
+ then
+ continue
+ fi
+ if [ -n "$osds" ]; then
+ osds="${osds} "
+ fi
+ osds="${osds}${osd}"
+ done
+ echo $osds
+}
+
+# Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
+# Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
+# Remap the last OSD to partially full OSD on both pools
+# The 2 pools should race to backfill.
+# One pool goes active+clean
+# The other goes acitve+...+backfill_toofull
+function TEST_ec_backfill_multi() {
+ local dir=$1
+ local EC=$2
+ local pools=2
+ local OSDS=6
+ local k=3
+ local m=2
+ local ecobjects=$(expr $objects / $k)
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # This test requires that shards from 2 different pools
+ # fit on a given OSD, but both will not fix. I'm using
+ # making the fillosd plus 1 shard use 75% of the space,
+ # leaving not enough to be under the 85% set here.
+ ceph osd set-backfillfull-ratio .85
+
+ ceph osd set-require-min-compat-client luminous
+ create_pool fillpool 1 1
+ ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+
+ # Partially fill an osd
+ # We have room for 200 18K replicated objects, if we create 9K objects
+ # there is only 3600K - (9K * 200) = 1800K which will only hold
+ # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
+ # The actual data will be (12K / 3) * 200 = 800K because the extra
+ # is the reservation padding for chunking.
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
+ for o in $(seq 1 $ecobjects)
+ do
+ rados -p fillpool put obj$o $dir/datafile
+ done
+
+ local fillosd=$(get_primary fillpool obj1)
+ ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+ nonfillosds="$(osdlist $OSDS $fillosd)"
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+ ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds
+ done
+
+ # Can't wait for clean here because we created a stale pg
+ #wait_for_clean || return 1
+ sleep 15
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
+ for o in $(seq 1 $ecobjects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
+ done
+ done
+
+ ceph pg dump pgs
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
+ done
+
+ sleep 30
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ceph pg dump pgs
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
+ then
+ echo "One didn't finish backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ delete_pool fillpool
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+# Similar to TEST_ec_backfill_multi but one of the ec pools
+# already had some data on the target OSD
+
+# Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
+# Write a small amount of data to 1 EC pool that still includes the filled one
+# Take down fillosd with noout set
+# Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
+# Remap the last OSD to partially full OSD on both pools
+# The 2 pools should race to backfill.
+# One pool goes active+clean
+# The other goes acitve+...+backfill_toofull
+function SKIP_TEST_ec_backfill_multi_partial() {
+ local dir=$1
+ local EC=$2
+ local pools=2
+ local OSDS=5
+ local k=3
+ local m=2
+ local ecobjects=$(expr $objects / $k)
+ local lastosd=$(expr $OSDS - 1)
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # This test requires that shards from 2 different pools
+ # fit on a given OSD, but both will not fix. I'm using
+ # making the fillosd plus 1 shard use 75% of the space,
+ # leaving not enough to be under the 85% set here.
+ ceph osd set-backfillfull-ratio .85
+
+ ceph osd set-require-min-compat-client luminous
+ create_pool fillpool 1 1
+ ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+ # last osd
+ ceph osd pg-upmap 1.0 $lastosd
+
+ # Partially fill an osd
+ # We have room for 200 18K replicated objects, if we create 9K objects
+ # there is only 3600K - (9K * 200) = 1800K which will only hold
+ # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
+ # The actual data will be (12K / 3) * 200 = 800K because the extra
+ # is the reservation padding for chunking.
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
+ for o in $(seq 1 $ecobjects)
+ do
+ rados -p fillpool put obj$o $dir/datafile
+ done
+
+ local fillosd=$(get_primary fillpool obj1)
+ ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+ nonfillosds="$(osdlist $OSDS $fillosd)"
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+ ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
+ done
+
+ # Can't wait for clean here because we created a stale pg
+ #wait_for_clean || return 1
+ sleep 15
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
+ for o in $(seq 1 $ecobjects)
+ do
+ rados -p "${poolprefix}1" put obj$o-1 $dir/datafile
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1))
+ done
+ ceph pg dump pgs
+
+ #ceph osd set noout
+ #kill_daemons $dir TERM osd.$lastosd || return 1
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
+ for o in $(seq 1 $ecobjects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
+ done
+ done
+
+ ceph pg dump pgs
+
+ # Now backfill lastosd by adding back into the upmap
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
+ done
+ #activate_osd $dir $lastosd || return 1
+ #ceph tell osd.0 debug kick_recovery_wq 0
+
+ sleep 30
+ ceph pg dump pgs
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ceph pg dump pgs
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
+ then
+ echo "One didn't finish backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ delete_pool fillpool
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+function SKIP_TEST_ec_backfill_multi_partial() {
+ local dir=$1
+ local EC=$2
+ local pools=2
+ local OSDS=6
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # Below we need to fit 3200K in 3600K which is 88%
+ # so set to 90%
+ ceph osd set-backfillfull-ratio .90
+
+ ceph osd set-require-min-compat-client luminous
+ create_pool fillpool 1 1
+ ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+
+ # Partially fill an osd
+ # We have room for 200 48K ec objects, if we create 4k replicated objects
+ # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
+ # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
+ # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+ for o in $(seq 1 $objects)
+ do
+ rados -p fillpool put obj$o $dir/datafile
+ done
+
+ local fillosd=$(get_primary fillpool obj1)
+ osd=$(expr $fillosd + 1)
+ if [ "$osd" = "$OSDS" ]; then
+ osd="0"
+ fi
+
+ sleep 5
+ kill_daemon $dir/osd.$fillosd.pid TERM
+ ceph osd out osd.$fillosd
+ sleep 2
+ ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+ done
+
+ # Can't wait for clean here because we created a stale pg
+ #wait_for_clean || return 1
+ sleep 5
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
+ for o in $(seq 1 $objects)
+ do
+ for p in $(seq 1 $pools)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+ done
+
+ #ceph pg map 2.0 --format=json | jq '.'
+ kill_daemon $dir/osd.$osd.pid TERM
+ ceph osd out osd.$osd
+
+ _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out
+ _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out
+
+ activate_osd $dir $fillosd || return 1
+ ceph osd in osd.$fillosd
+ sleep 30
+
+ wait_for_not_backfilling 1200 || return 1
+ wait_for_not_activating 60 || return 1
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in backfill_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
+ then
+ echo "One didn't finish backfill"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ delete_pool fillpool
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+# Create 1 EC pool
+# Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
+# Take 1 shard's OSD down (with noout set)
+# Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
+# Write 150 36K objects (grow 150 objects) 2400K
+# But there is already 1600K usage so backfill
+# would be too full if it didn't account for existing data
+# Bring back down OSD so it must backfill
+# It should go active+clean taking into account data already there
+function TEST_ec_backfill_grow() {
+ local dir=$1
+ local poolname="test"
+ local OSDS=6
+ local k=3
+ local m=2
+ local ecobjects=$(expr $objects / $k)
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-backfillfull-ratio .85
+
+ ceph osd set-require-min-compat-client luminous
+ ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+ ceph osd pool create $poolname 1 1 erasure ec-profile
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12
+ for i in $(seq 1 $ecobjects)
+ do
+ rados -p $poolname put obj$i $dir/12kdata
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set noout
+ kill_daemons $dir TERM $otherosd || return 1
+
+ rmobjects=$(expr $ecobjects / 4)
+ for i in $(seq 1 $rmobjects)
+ do
+ rados -p $poolname rm obj$i
+ done
+
+ dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36
+ for i in $(seq $(expr $rmobjects + 1) $ecobjects)
+ do
+ rados -p $poolname put obj$i $dir/36kdata
+ done
+
+ activate_osd $dir $otherosd || return 1
+
+ ceph tell osd.$primary debug kick_recovery_wq 0
+
+ sleep 2
+
+ wait_for_clean || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+main osd-backfill-space "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"
+# End:
diff --git a/qa/standalone/osd-backfill/osd-backfill-stats.sh b/qa/standalone/osd-backfill/osd-backfill-stats.sh
new file mode 100755
index 000000000..21b42a4ce
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-stats.sh
@@ -0,0 +1,761 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
+ export margin=10
+ export objects=200
+ export poolname=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function below_margin() {
+ local -i check=$1
+ shift
+ local -i target=$1
+
+ return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
+}
+
+function above_margin() {
+ local -i check=$1
+ shift
+ local -i target=$1
+
+ return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
+}
+
+FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
+FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
+FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
+
+function check() {
+ local dir=$1
+ local PG=$2
+ local primary=$3
+ local type=$4
+ local degraded_start=$5
+ local degraded_end=$6
+ local misplaced_start=$7
+ local misplaced_end=$8
+ local primary_start=${9:-}
+ local primary_end=${10:-}
+ local check_setup=${11:-true}
+
+ local log=$(grep -l +backfilling $dir/osd.$primary.log)
+ if [ $check_setup = "true" ];
+ then
+ local alllogs=$(grep -l +backfilling $dir/osd.*.log)
+ if [ "$(echo "$alllogs" | wc -w)" != "1" ];
+ then
+ echo "Test setup failure, a single OSD should have performed backfill"
+ return 1
+ fi
+ fi
+
+ local addp=" "
+ if [ "$type" = "erasure" ];
+ then
+ addp="p"
+ fi
+
+ UPACT=$(eval $FIND_UPACT)
+ [ -n "$UPACT" ] || return 1
+
+ # Check 3rd line at start because of false recovery starts
+ local which="degraded"
+ FIRST=$(eval $FIND_FIRST)
+ [ -n "$FIRST" ] || return 1
+ below_margin $FIRST $degraded_start || return 1
+ LAST=$(eval $FIND_LAST)
+ [ -n "$LAST" ] || return 1
+ above_margin $LAST $degraded_end || return 1
+
+ # Check 3rd line at start because of false recovery starts
+ which="misplaced"
+ FIRST=$(eval $FIND_FIRST)
+ [ -n "$FIRST" ] || return 1
+ below_margin $FIRST $misplaced_start || return 1
+ LAST=$(eval $FIND_LAST)
+ [ -n "$LAST" ] || return 1
+ above_margin $LAST $misplaced_end || return 1
+
+ # This is the value of set into MISSING_ON_PRIMARY
+ if [ -n "$primary_start" ];
+ then
+ which="shard $primary"
+ FIRST=$(eval $FIND_FIRST)
+ [ -n "$FIRST" ] || return 1
+ below_margin $FIRST $primary_start || return 1
+ LAST=$(eval $FIND_LAST)
+ [ -n "$LAST" ] || return 1
+ above_margin $LAST $primary_end || return 1
+ fi
+}
+
+# [1] -> [1, 0, 2]
+# degraded 1000 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 1000 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466 22'500 26:617 [1,0,2] 1 [1] 1 0'0 2017-10-27 09:43:44.654882 0'0 2017-10-27 09:43:44.654882
+function TEST_backfill_sizeup() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ ceph osd set nobackfill
+ ceph osd pool set $poolname size 3
+ sleep 2
+ ceph osd unset nobackfill
+
+ wait_for_clean || return 1
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+
+ local degraded=$(expr $objects \* 2)
+ check $dir $PG $primary replicated $degraded 0 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+
+# [1] -> [0, 2, 4]
+# degraded 1000 -> 0
+# misplaced 500 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 1000 500 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849 22'500 26:603 [0,2,4] 0 [1] 1 0'0 2017-10-27 09:48:13.236253 0'0 2017-10-27 09:48:13.236253
+function TEST_backfill_sizeup_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ ceph osd out osd.$primary
+ ceph osd pool set $poolname size 3
+ sleep 2
+ ceph osd unset nobackfill
+
+ wait_for_clean || return 1
+
+ local degraded=$(expr $objects \* 2)
+ check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [1 0] -> [1,2]/[1,0]
+# misplaced 500 -> 0
+# state: active+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-10-27 09:51:18.800517 22'500 25:570 [1,2] 1 [1,0] 1 0'0 2017-10-27 09:50:40.441274 0'0 2017-10-27 09:50:40.441274
+function TEST_backfill_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 2
+ sleep 5
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ ceph osd out osd.$(get_not_primary $poolname obj1)
+ sleep 2
+ ceph osd unset nobackfill
+
+ wait_for_clean || return 1
+
+ check $dir $PG $primary replicated 0 0 $objects 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [0, 1] -> [0, 2]/[0]
+# osd 1 down/out
+# degraded 500 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091 22'500 27:719 [0,2] 0 [0] 0 0'0 2017-10-27 09:52:43.188368 0'0 2017-10-27 09:52:43.188368
+function TEST_backfill_down_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 2
+ sleep 5
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ ceph osd out osd.${otherosd}
+ sleep 2
+ ceph osd unset nobackfill
+
+ wait_for_clean || return 1
+
+ check $dir $PG $primary replicated $objects 0 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [1, 0] -> [2, 3, 4]
+# degraded 500 -> 0
+# misplaced 1000 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722 23'500 27:553 [2,4,3] 2 [1,0] 1 0'0 2017-10-27 09:55:10.230919 0'0 2017-10-27 09:55:10.230919
+function TEST_backfill_out2() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 2
+ sleep 5
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ ceph osd pool set $poolname size 3
+ ceph osd out osd.${otherosd}
+ ceph osd out osd.${primary}
+ # Primary might change before backfill starts
+ sleep 2
+ primary=$(get_primary $poolname obj1)
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local misplaced=$(expr $objects \* 2)
+
+ check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [0,1] -> [2,4,3]/[0,1]
+# degraded 1000 -> 0
+# misplaced 1000 -> 500
+# state ends at active+clean+remapped [2,4,3]/[2,4,3,0]
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 1000 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149 19'500 23:1817 [2,4,3] 2 [0,1] 0 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904
+# ENDS:
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-30 18:22:42.293730 19'500 25:2557 [2,4,3] 2 [2,4,3,0] 2 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904
+function TEST_backfill_sizeup4_allout() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ ceph osd out osd.$otherosd
+ ceph osd out osd.$primary
+ ceph osd pool set $poolname size 4
+ # Primary might change before backfill starts
+ sleep 2
+ primary=$(get_primary $poolname obj1)
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local misdeg=$(expr $objects \* 2)
+ check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [1,2,0] -> [3]/[1,2]
+# misplaced 1000 -> 500
+# state ends at active+clean+remapped [3]/[3,1]
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 0 1000 0 0 100 100 active+remapped+backfilling 2017-11-28 19:13:56.092439 21'500 31:790 [3] 3 [1,2] 1 0'0 2017-11-28 19:13:28.698661 0'0 2017-11-28 19:13:28.698661
+function TEST_backfill_remapped() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ sleep 5
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ ceph osd out osd.${otherosd}
+ for i in $(get_osds $poolname obj1)
+ do
+ if [ $i = $primary -o $i = $otherosd ];
+ then
+ continue
+ fi
+ ceph osd out osd.$i
+ break
+ done
+ ceph osd out osd.${primary}
+ ceph osd pool set $poolname size 2
+ sleep 2
+
+ # primary may change due to invalidating the old pg_temp, which was [1,2,0],
+ # but up_primary (3) chooses [0,1] for acting.
+ primary=$(get_primary $poolname obj1)
+
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local misplaced=$(expr $objects \* 2)
+
+ check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [1,0,2] -> [4,3,NONE]/[1,0,2]
+# misplaced 1500 -> 500
+# state ends at active+clean+remapped [4,3,NONE]/[4,3,2]
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 0 1500 0 0 100 100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126 19'500 23:615 [4,3,NONE] 4 [1,0,2] 1 0'0 2017-10-31 16:52:59.624429 0'0 2017-10-31 16:52:59.624429
+
+
+# ENDS:
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-31 16:48:34.414040 19'500 25:2049 [4,3,NONE] 4 [4,3,2] 4 0'0 2017-10-31 16:46:58.203440 0'0 2017-10-31 16:46:58.203440
+function TEST_backfill_ec_all_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 erasure myprofile
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ for o in $(get_osds $poolname obj1)
+ do
+ ceph osd out osd.$o
+ done
+ # Primary might change before backfill starts
+ sleep 2
+ primary=$(get_primary $poolname obj1)
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local misplaced=$(expr $objects \* 3)
+ check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [1,0,2] -> [4, 0, 2]
+# misplaced 500 -> 0
+# active+remapped+backfilling
+#
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-11-08 18:05:39.036420 24'500 27:742 [4,0,2] 4 [1,0,2] 1 0'0 2017-11-08 18:04:58.697315 0'0 2017-11-08 18:04:58.697315
+function TEST_backfill_ec_prim_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 erasure myprofile
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ ceph osd out osd.$primary
+ # Primary might change before backfill starts
+ sleep 2
+ primary=$(get_primary $poolname obj1)
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local misplaced=$(expr $objects \* 3)
+ check $dir $PG $primary erasure 0 0 $objects 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [1,0] -> [1,2]
+# degraded 500 -> 0
+# misplaced 1000 -> 0
+#
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105 24'500 29:1020 [4,3,5] 4 [1,NONE,2] 1 0'0 2017-11-06 14:01:46.509963 0'0 2017-11-06 14:01:46.509963
+function TEST_backfill_ec_down_all_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 erasure myprofile
+ ceph osd pool set $poolname min_size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+ local allosds=$(get_osds $poolname obj1)
+
+ ceph osd set nobackfill
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ for o in $allosds
+ do
+ ceph osd out osd.$o
+ done
+ # Primary might change before backfill starts
+ sleep 2
+ primary=$(get_primary $poolname obj1)
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+ sleep 2
+ flush_pg_stats
+
+ # Wait for recovery to finish
+ # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling
+ # to active+undersized+remapped
+ while(true)
+ do
+ if test "$(ceph --format json pg dump pgs |
+ jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0"
+ then
+ sleep 2
+ continue
+ fi
+ break
+ done
+ ceph pg dump pgs
+ for i in $(seq 1 240)
+ do
+ if ceph pg dump pgs | grep ^$PG | grep -qv backfilling
+ then
+ break
+ fi
+ if [ $i = "240" ];
+ then
+ echo "Timeout waiting for recovery to finish"
+ return 1
+ fi
+ sleep 1
+ done
+
+ ceph pg dump pgs
+
+ local misplaced=$(expr $objects \* 2)
+ check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+# [1,0,2] -> [1,3,2]
+# degraded 500 -> 0
+# active+backfilling+degraded
+#
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322 22'500 28:794 [1,3,2] 1 [1,NONE,2] 1 0'0 2017-11-06 13:54:58.033906 0'0 2017-11-06 13:54:58.033906
+function TEST_backfill_ec_down_out() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 erasure myprofile
+ ceph osd pool set $poolname min_size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ # Remember primary during the backfill
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set nobackfill
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ ceph osd out osd.${otherosd}
+ # Primary might change before backfill starts
+ sleep 2
+ primary=$(get_primary $poolname obj1)
+ ceph osd unset nobackfill
+ ceph tell osd.$primary get_latest_osdmap
+ ceph tell osd.$primary debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local misplaced=$(expr $objects \* 2)
+ check $dir $PG $primary erasure $objects 0 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+main osd-backfill-stats "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh"
+# End:
diff --git a/qa/standalone/osd/bad-inc-map.sh b/qa/standalone/osd/bad-inc-map.sh
new file mode 100755
index 000000000..cc3cf27cc
--- /dev/null
+++ b/qa/standalone/osd/bad-inc-map.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+mon_port=$(get_unused_port)
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:$mon_port"
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ set -e
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_bad_inc_map() {
+ local dir=$1
+
+ run_mon $dir a
+ run_mgr $dir x
+ run_osd $dir 0
+ run_osd $dir 1
+ run_osd $dir 2
+
+ ceph config set osd.2 osd_inject_bad_map_crc_probability 1
+
+ # osd map churn
+ create_pool foo 8
+ ceph osd pool set foo min_size 1
+ ceph osd pool set foo min_size 2
+
+ sleep 5
+
+ # make sure all the OSDs are still up
+ TIMEOUT=10 wait_for_osd up 0
+ TIMEOUT=10 wait_for_osd up 1
+ TIMEOUT=10 wait_for_osd up 2
+
+ # check for the signature in the log
+ grep "injecting map crc failure" $dir/osd.2.log || return 1
+ grep "bailing because last" $dir/osd.2.log || return 1
+
+ echo success
+
+ delete_pool foo
+ kill_daemons $dir || return 1
+}
+
+main bad-inc-map "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh"
+# End:
diff --git a/qa/standalone/osd/divergent-priors.sh b/qa/standalone/osd/divergent-priors.sh
new file mode 100755
index 000000000..40d72544d
--- /dev/null
+++ b/qa/standalone/osd/divergent-priors.sh
@@ -0,0 +1,855 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # This should multiple of 6
+ export loglen=12
+ export divisor=3
+ export trim=$(expr $loglen / 2)
+ export DIVERGENT_WRITE=$(expr $trim / $divisor)
+ export DIVERGENT_REMOVE=$(expr $trim / $divisor)
+ export DIVERGENT_CREATE=$(expr $trim / $divisor)
+ export poolname=test
+ export testobjects=100
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # so we will not force auth_log_shard to be acting_primary
+ CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+ CEPH_ARGS+="--osd_debug_pg_log_writeout=true "
+ CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+# Special case divergence test
+# Test handling of divergent entries with prior_version
+# prior to log_tail
+# based on qa/tasks/divergent_prior.py
+function TEST_divergent() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ local case5=$testobjects
+ local case3=$(expr $testobjects - 1)
+ # Write some soon to be divergent
+ echo 'writing divergent object'
+ rados -p $poolname put existing_$case5 $dummyfile &
+ echo 'create missing divergent object'
+ inject_eio rep data $poolname existing_$case3 $dir 0 || return 1
+ rados -p $poolname get existing_$case3 $dir/existing &
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+ #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+
+ sleep 20
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #5
+ if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+function TEST_divergent_ec() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_ec_pool $poolname true k=2 m=1 || return 1
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ # Write some soon to be divergent
+ echo 'writing divergent object'
+ rados -p $poolname put existing_$testobjects $dummyfile2 &
+ sleep 1
+ rados -p $poolname put existing_$testobjects $dummyfile &
+ rados -p $poolname mksnap snap1
+ rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile &
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ sleep 5
+ #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # Dump logs
+ for i in $non_divergent
+ do
+ kill_daemons $dir KILL osd.$i || return 1
+ _objectstore_tool_nodown $dir $i --op log --pgid $pgid
+ activate_osd $dir $i || return 1
+ done
+ _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+ #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+
+ sleep 20
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #3
+ # XXX: Not reproducing this case
+# if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log')
+# then
+# echo failure
+# return 1
+# fi
+ # Check for _merge_object_divergent_entries for case #4
+ if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# Special case divergence test with ceph-objectstore-tool export/remove/import
+# Test handling of divergent entries with prior_version
+# prior to log_tail and a ceph-objectstore-tool export/import
+# based on qa/tasks/divergent_prior2.py
+function TEST_divergent_2() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ # Do some creates to hit case 2
+ echo 'create new divergent objects'
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname create newobject_$i &
+ done
+ # Write some soon to be divergent
+ echo 'writing divergent objects'
+ for i in $(seq 1 $DIVERGENT_WRITE)
+ do
+ rados -p $poolname put existing_$i $dummyfile2 &
+ done
+ # Remove some soon to be divergent
+ echo 'remove divergent objects'
+ for i in $(seq 1 $DIVERGENT_REMOVE)
+ do
+ rmi=$(expr $i + $DIVERGENT_WRITE)
+ rados -p $poolname rm existing_$rmi &
+ done
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ # At this point the divergent_priors should have been detected
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+
+ # export a pg
+ expfile=$dir/exp.$$.out
+ _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile
+ _objectstore_tool_nodown $dir $divergent --op import --file $expfile
+
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+ wait_for_osd up $divergent
+
+ sleep 20
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname get newobject_$i $dir/existing
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #1
+ if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ # Check for _merge_object_divergent_entries for case #2
+ if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ rm $dir/$expfile
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# this is the same as case _2 above, except we enable pg autoscaling in order
+# to reproduce https://tracker.ceph.com/issues/41816
+function TEST_divergent_3() {
+ local dir=$1
+
+ # something that is always there
+ local dummyfile='/etc/fstab'
+ local dummyfile2='/etc/resolv.conf'
+
+ local num_osds=3
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ ceph osd set noout
+ ceph osd set noin
+ ceph osd set nodown
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+
+ # reproduce https://tracker.ceph.com/issues/41816
+ ceph osd pool set $poolname pg_autoscale_mode on
+
+ divergent=-1
+ start_time=$(date +%s)
+ max_duration=300
+
+ while [ "$divergent" -le -1 ]
+ do
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ # determine primary
+ divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+ echo "primary and soon to be divergent is $divergent"
+ ceph pg dump pgs
+
+ current_time=$(date +%s)
+ elapsed_time=$(expr $current_time - $start_time)
+ if [ "$elapsed_time" -gt "$max_duration" ]; then
+ echo "timed out waiting for divergent"
+ return 1
+ fi
+ done
+
+ local non_divergent=""
+ for i in $osds
+ do
+ if [ "$i" = "$divergent" ]; then
+ continue
+ fi
+ non_divergent="$non_divergent $i"
+ done
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ local pgid=$(get_pg $poolname existing_1)
+
+ # blackhole non_divergent
+ echo "blackholing osds $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+ done
+
+ # Do some creates to hit case 2
+ echo 'create new divergent objects'
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname create newobject_$i &
+ done
+ # Write some soon to be divergent
+ echo 'writing divergent objects'
+ for i in $(seq 1 $DIVERGENT_WRITE)
+ do
+ rados -p $poolname put existing_$i $dummyfile2 &
+ done
+ # Remove some soon to be divergent
+ echo 'remove divergent objects'
+ for i in $(seq 1 $DIVERGENT_REMOVE)
+ do
+ rmi=$(expr $i + $DIVERGENT_WRITE)
+ rados -p $poolname rm existing_$rmi &
+ done
+ sleep 10
+ killall -9 rados
+
+ # kill all the osds but leave divergent in
+ echo 'killing all the osds'
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd || return 1
+ for i in $osds
+ do
+ ceph osd down osd.$i
+ done
+ for i in $non_divergent
+ do
+ ceph osd out osd.$i
+ done
+
+ # bring up non-divergent
+ echo "bringing up non_divergent $non_divergent"
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ activate_osd $dir $i || return 1
+ done
+ for i in $non_divergent
+ do
+ ceph osd in osd.$i
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+ echo "writing non-divergent object $objname"
+ ceph pg dump pgs
+ rados -p $poolname put $objname $dummyfile2
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # ensure no recovery of up osds first
+ echo 'delay recovery'
+ ceph pg dump pgs
+ for i in $non_divergent
+ do
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+ done
+
+ # bring in our divergent friend
+ echo "revive divergent $divergent"
+ ceph pg dump pgs
+ ceph osd set noup
+ activate_osd $dir $divergent
+ sleep 5
+
+ echo 'delay recovery divergent'
+ ceph pg dump pgs
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+ ceph osd unset noup
+
+ wait_for_osd up 0
+ wait_for_osd up 1
+ wait_for_osd up 2
+
+ ceph pg dump pgs
+ echo 'wait for peering'
+ ceph pg dump pgs
+ rados -p $poolname put foo $dummyfile
+
+ # At this point the divergent_priors should have been detected
+
+ echo "killing divergent $divergent"
+ ceph pg dump pgs
+ kill_daemons $dir KILL osd.$divergent
+
+ # export a pg
+ expfile=$dir/exp.$$.out
+ _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile
+ _objectstore_tool_nodown $dir $divergent --op import --file $expfile
+
+ echo "reviving divergent $divergent"
+ ceph pg dump pgs
+ activate_osd $dir $divergent
+ wait_for_osd up $divergent
+
+ sleep 20
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight
+
+ echo "allowing recovery"
+ ceph pg dump pgs
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in $osds
+ do
+ ceph tell osd.$i debug kick_recovery_wq 0
+ done
+
+ echo 'reading divergent objects'
+ ceph pg dump pgs
+ for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+ do
+ rados -p $poolname get existing_$i $dir/existing || return 1
+ done
+ for i in $(seq 1 $DIVERGENT_CREATE)
+ do
+ rados -p $poolname get newobject_$i $dir/existing
+ done
+ rm -f $dir/existing
+
+ grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+ # Check for _merge_object_divergent_entries for case #1
+ if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ # Check for _merge_object_divergent_entries for case #2
+ if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ rm $dir/$expfile
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+
+main divergent-priors "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh"
+# End:
diff --git a/qa/standalone/osd/ec-error-rollforward.sh b/qa/standalone/osd/ec-error-rollforward.sh
new file mode 100755
index 000000000..621e6b13f
--- /dev/null
+++ b/qa/standalone/osd/ec-error-rollforward.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ export margin=10
+ export objects=200
+ export poolname=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_ec_error_rollforward() {
+ local dir=$1
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd
+ ceph osd pool create ec 1 1 erasure ec-profile
+
+ rados -p ec put foo /etc/passwd
+
+ kill -STOP $(cat $dir/osd.2.pid)
+
+ rados -p ec rm foo &
+ pids="$!"
+ sleep 1
+ rados -p ec rm a &
+ pids+=" $!"
+ rados -p ec rm b &
+ pids+=" $!"
+ rados -p ec rm c &
+ pids+=" $!"
+ sleep 1
+ # Use SIGKILL so stopped osd.2 will terminate
+ # and kill_daemons waits for daemons to die
+ kill_daemons $dir KILL osd
+ kill $pids
+ wait
+
+ activate_osd $dir 0 || return 1
+ activate_osd $dir 1 || return 1
+ activate_osd $dir 2 || return 1
+ activate_osd $dir 3 || return 1
+
+ wait_for_clean || return 1
+}
+
+main ec-error-rollforward "$@"
diff --git a/qa/standalone/osd/osd-bench.sh b/qa/standalone/osd/osd-bench.sh
new file mode 100755
index 000000000..eb1a6a440
--- /dev/null
+++ b/qa/standalone/osd/osd-bench.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7106" # git grep '\<7106\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--debug-bluestore 20 "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_bench() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ local osd_bench_small_size_max_iops=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_small_size_max_iops)
+ local osd_bench_large_size_max_throughput=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_large_size_max_throughput)
+ local osd_bench_max_block_size=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_max_block_size)
+ local osd_bench_duration=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_bench_duration)
+
+ #
+ # block size too high
+ #
+ expect_failure $dir osd_bench_max_block_size \
+ ceph tell osd.0 bench 1024 $((osd_bench_max_block_size + 1)) || return 1
+
+ #
+ # count too high for small (< 1MB) block sizes
+ #
+ local bsize=1024
+ local max_count=$(($bsize * $osd_bench_duration * $osd_bench_small_size_max_iops))
+ expect_failure $dir bench_small_size_max_iops \
+ ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1
+
+ #
+ # count too high for large (>= 1MB) block sizes
+ #
+ local bsize=$((1024 * 1024 + 1))
+ local max_count=$(($osd_bench_large_size_max_throughput * $osd_bench_duration))
+ expect_failure $dir osd_bench_large_size_max_throughput \
+ ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1
+
+ #
+ # default values should work
+ #
+ ceph tell osd.0 bench || return 1
+
+ #
+ # test object_size < block_size
+ ceph tell osd.0 bench 10 14456 4444 3
+ #
+
+ #
+ # test object_size < block_size & object_size = 0(default value)
+ #
+ ceph tell osd.0 bench 1 14456
+}
+
+main osd-bench "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh
new file mode 100755
index 000000000..aedfbc9b5
--- /dev/null
+++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh
@@ -0,0 +1,497 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+[ `uname` = FreeBSD ] && exit 0
+
+function run() {
+ local dir=$1
+ shift
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_bluestore() {
+ local dir=$1
+
+ local flimit=$(ulimit -n)
+ if [ $flimit -lt 1536 ]; then
+ echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+ fi
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--bluestore_block_size=2147483648 "
+ CEPH_ARGS+="--bluestore_block_db_create=true "
+ CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+ CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
+ CEPH_ARGS+="--bluestore_block_wal_create=true "
+ CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ run_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ run_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ run_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ sleep 5
+
+ create_pool foo 16
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ echo "after bench"
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+ while kill $osd_pid1; do sleep 1 ; done
+ ceph osd down 1
+ while kill $osd_pid2; do sleep 1 ; done
+ ceph osd down 2
+ while kill $osd_pid3; do sleep 1 ; done
+ ceph osd down 3
+
+ # expand slow devices
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ truncate $dir/0/block -s 4294967296 # 4GB
+ ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
+ truncate $dir/1/block -s 4311744512 # 4GB + 16MB
+ ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
+ truncate $dir/2/block -s 4295099392 # 4GB + 129KB
+ ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
+ truncate $dir/3/block -s 4293918720 # 4GB - 1MB
+ ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
+
+ # slow, DB, WAL -> slow, DB
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes
+
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.wal \
+ --dev-target $dir/0/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, DB, WAL -> slow, WAL
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block.db \
+ --dev-target $dir/1/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ # slow, DB, WAL -> slow
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block.wal \
+ --devs-source $dir/2/block.db \
+ --dev-target $dir/2/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow, DB, WAL -> slow, WAL (negative case)
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block.db \
+ --dev-target $dir/3/block.wal \
+ --command bluefs-bdev-migrate
+
+ # Migration to WAL is unsupported
+ if [ $? -eq 0 ]; then
+ return 1
+ fi
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ # slow, DB, WAL -> slow, DB (WAL to slow then slow to DB)
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block.wal \
+ --dev-target $dir/3/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block \
+ --dev-target $dir/3/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ activate_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ activate_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ activate_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ wait_for_clean || return 1
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+ while kill $osd_pid1; do sleep 1 ; done
+ ceph osd down 1
+ while kill $osd_pid2; do sleep 1 ; done
+ ceph osd down 2
+ while kill $osd_pid3; do sleep 1 ; done
+ ceph osd down 3
+
+ # slow, DB -> slow, DB, WAL
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ dd if=/dev/zero of=$dir/0/wal count=512 bs=1M
+ ceph-bluestore-tool --path $dir/0 \
+ --dev-target $dir/0/wal \
+ --command bluefs-bdev-new-wal || return 1
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, WAL -> slow, DB, WAL
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ dd if=/dev/zero of=$dir/1/db count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/1 \
+ --dev-target $dir/1/db \
+ --command bluefs-bdev-new-db || return 1
+
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block \
+ --dev-target $dir/1/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ # slow -> slow, DB, WAL
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/2 \
+ --command bluefs-bdev-new-db || return 1
+
+ ceph-bluestore-tool --path $dir/2 \
+ --command bluefs-bdev-new-wal || return 1
+
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block \
+ --dev-target $dir/2/block.db \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow, DB -> slow, WAL
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/3 \
+ --command bluefs-bdev-new-wal || return 1
+
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block.db \
+ --dev-target $dir/3/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ activate_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ activate_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ activate_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+ while kill $osd_pid1; do sleep 1 ; done
+ ceph osd down 1
+ while kill $osd_pid2; do sleep 1 ; done
+ ceph osd down 2
+ while kill $osd_pid3; do sleep 1 ; done
+ ceph osd down 3
+
+ # slow, DB1, WAL -> slow, DB2, WAL
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.db \
+ --dev-target $dir/0/db2 \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, DB, WAL1 -> slow, DB, WAL2
+
+ dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.wal \
+ --dev-target $dir/0/wal2 \
+ --command bluefs-bdev-migrate || return 1
+ rm -rf $dir/0/wal
+
+ ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+ # slow, DB + WAL -> slow, DB2 -> slow
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block.db \
+ --devs-source $dir/1/block.wal \
+ --dev-target $dir/1/db2 \
+ --command bluefs-bdev-migrate || return 1
+
+ rm -rf $dir/1/db
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ ceph-bluestore-tool --path $dir/1 \
+ --devs-source $dir/1/block.db \
+ --dev-target $dir/1/block \
+ --command bluefs-bdev-migrate || return 1
+
+ rm -rf $dir/1/db2
+
+ ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+ # slow -> slow, DB (negative case)
+ ceph-objectstore-tool --type bluestore --data-path $dir/2 \
+ --op fsck --no-mon-config || return 1
+
+ dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block \
+ --dev-target $dir/2/db2 \
+ --command bluefs-bdev-migrate
+
+ # Migration from slow-only to new device is unsupported
+ if [ $? -eq 0 ]; then
+ return 1
+ fi
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow + DB + WAL -> slow, DB2
+ dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M
+
+ ceph-bluestore-tool --path $dir/2 \
+ --devs-source $dir/2/block \
+ --devs-source $dir/2/block.db \
+ --devs-source $dir/2/block.wal \
+ --dev-target $dir/2/db2 \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+ # slow + WAL -> slow2, WAL2
+ dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M
+
+ ceph-bluestore-tool --path $dir/3 \
+ --devs-source $dir/3/block \
+ --devs-source $dir/3/block.wal \
+ --dev-target $dir/3/wal2 \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+ activate_osd $dir 1 || return 1
+ osd_pid1=$(cat $dir/osd.1.pid)
+ activate_osd $dir 2 || return 1
+ osd_pid2=$(cat $dir/osd.2.pid)
+ activate_osd $dir 3 || return 1
+ osd_pid3=$(cat $dir/osd.3.pid)
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+ wait_for_clean || return 1
+}
+
+function TEST_bluestore2() {
+ local dir=$1
+
+ local flimit=$(ulimit -n)
+ if [ $flimit -lt 1536 ]; then
+ echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+ fi
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--bluestore_block_size=4294967296 "
+ CEPH_ARGS+="--bluestore_block_db_create=true "
+ CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+ CEPH_ARGS+="--bluestore_block_wal_create=false "
+ CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+ CEPH_ARGS+="--osd_pool_default_size=1 "
+ CEPH_ARGS+="--osd_pool_default_min_size=1 "
+ CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ sleep 5
+ create_pool foo 16
+
+ retry = 0
+ while [[ $retry -le 5 ]]; do
+ # write some objects
+ timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
+
+ #give RocksDB some time to cooldown and put files to slow level(s)
+ sleep 10
+
+ db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" )
+ spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
+ ((retry+=1))
+ test $spilled_over -eq 0 || break
+ done
+ test $spilled_over -gt 0 || return 1
+
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+
+ ceph-bluestore-tool --path $dir/0 \
+ --devs-source $dir/0/block.db \
+ --dev-target $dir/0/block \
+ --command bluefs-bdev-migrate || return 1
+
+ ceph-bluestore-tool --path $dir/0 \
+ --command bluefs-bdev-sizes || return 1
+
+ ceph-bluestore-tool --path $dir/0 \
+ --command fsck || return 1
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ wait_for_clean || return 1
+}
+
+function TEST_bluestore_expand() {
+ local dir=$1
+
+ local flimit=$(ulimit -n)
+ if [ $flimit -lt 1536 ]; then
+ echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+ fi
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--bluestore_block_size=4294967296 "
+ CEPH_ARGS+="--bluestore_block_db_create=true "
+ CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+ CEPH_ARGS+="--bluestore_block_wal_create=false "
+ CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+ CEPH_ARGS+="--osd_pool_default_size=1 "
+ CEPH_ARGS+="--osd_pool_default_min_size=1 "
+ CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ sleep 5
+ create_pool foo 16
+
+ # write some objects
+ timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+ sleep 5
+
+ total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
+ free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+
+ # destage allocation to file before expand (in case fast-shutdown skipped that step)
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1
+
+ # expand slow devices
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
+
+ requested_space=4294967296 # 4GB
+ truncate $dir/0/block -s $requested_space
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1
+
+ # slow, DB, WAL -> slow, DB
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
+
+ # compare allocation-file with RocksDB state
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
+
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes
+
+ activate_osd $dir 0 || return 1
+ osd_pid0=$(cat $dir/osd.0.pid)
+
+ wait_for_clean || return 1
+
+ total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
+ free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
+
+ if [$total_space_after != $requested_space]; then
+ echo "total_space_after = $total_space_after"
+ echo "requested_space = $requested_space"
+ return 1;
+ fi
+
+ total_space_added=$((total_space_after - total_space_before))
+ free_space_added=$((free_space_after - free_space_before))
+
+ let new_used_space=($total_space_added - $free_space_added)
+ echo $new_used_space
+ # allow upto 128KB to be consumed
+ if [ $new_used_space -gt 131072 ]; then
+ echo "total_space_added = $total_space_added"
+ echo "free_space_added = $free_space_added"
+ return 1;
+ fi
+
+ # kill
+ while kill $osd_pid0; do sleep 1 ; done
+ ceph osd down 0
+
+ ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
+}
+
+main osd-bluefs-volume-ops "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh"
+# End:
diff --git a/qa/standalone/osd/osd-config.sh b/qa/standalone/osd/osd-config.sh
new file mode 100755
index 000000000..126c2f7de
--- /dev/null
+++ b/qa/standalone/osd/osd-config.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_config_init() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local stale=1000
+ local cache=500
+ run_osd $dir 0 \
+ --osd-map-cache-size=$cache \
+ --osd-pg-epoch-persisted-max-stale=$stale \
+ || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+}
+
+function TEST_config_track() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ local osd_map_cache_size=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_map_cache_size)
+ local osd_pg_epoch_persisted_max_stale=$(CEPH_ARGS='' ceph-conf \
+ --show-config-value osd_pg_epoch_persisted_max_stale)
+
+ #
+ # increase the osd_pg_epoch_persisted_max_stale above the default cache_size
+ #
+ ! grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+ local stale=$(($osd_map_cache_size * 2))
+ ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+ grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+ rm $dir/osd.0.log
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log reopen || return 1
+}
+
+function TEST_default_adjustment() {
+ a=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin)
+ b=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin default)
+ c=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin arg)
+ [ "$a" != "default" ] || return 1
+ [ "$b" = "default" ] || return 1
+ [ "$c" = "arg" ] || return 1
+
+ a=$(ceph-osd --no-mon-config --show-config-value log_to_file)
+ b=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false)
+ c=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false --log-to-file)
+ [ "$a" = "true" ] || return 1
+ [ "$b" = "false" ] || return 1
+ [ "$c" = "true" ] || return 1
+}
+
+main osd-config "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-config.sh"
+# End:
diff --git a/qa/standalone/osd/osd-copy-from.sh b/qa/standalone/osd/osd-copy-from.sh
new file mode 100755
index 000000000..8ac0ab541
--- /dev/null
+++ b/qa/standalone/osd/osd-copy-from.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Sage Weil <sage@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7111" # git grep '\<7111\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_copy_from() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+
+ # success
+ rados -p rbd put foo $(which rados)
+ rados -p rbd cp foo foo2
+ rados -p rbd stat foo2
+
+ # failure
+ ceph tell osd.\* injectargs -- --osd-debug-inject-copyfrom-error
+ ! rados -p rbd cp foo foo3
+ ! rados -p rbd stat foo3
+
+ # success again
+ ceph tell osd.\* injectargs -- --no-osd-debug-inject-copyfrom-error
+ ! rados -p rbd cp foo foo3
+ rados -p rbd stat foo3
+}
+
+main osd-copy-from "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/osd/osd-dup.sh b/qa/standalone/osd/osd-dup.sh
new file mode 100755
index 000000000..ab442c538
--- /dev/null
+++ b/qa/standalone/osd/osd-dup.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+[ `uname` = FreeBSD ] && exit 0
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # avoid running out of fds in rados bench
+ CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 "
+ CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 "
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+main osd-dup "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh"
+# End:
diff --git a/qa/standalone/osd/osd-fast-mark-down.sh b/qa/standalone/osd/osd-fast-mark-down.sh
new file mode 100755
index 000000000..0ef9d8ce4
--- /dev/null
+++ b/qa/standalone/osd/osd-fast-mark-down.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Piotr Dałek <git@predictor.org.pl>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+MAX_PROPAGATION_TIME=30
+
+function run() {
+ local dir=$1
+ shift
+ rm -f $dir/*.pid
+ export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+ OLD_ARGS=$CEPH_ARGS
+ CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false "
+ echo "Ensuring old behavior is there..."
+ test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1)
+
+ CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true "
+ OLD_ARGS=$CEPH_ARGS
+
+ CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON"
+ echo "Testing async msgr..."
+ test_fast_kill $dir || return 1
+
+ return 0
+
+}
+
+function test_fast_kill() {
+ # create cluster with 3 osds
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for oi in {0..2}; do
+ run_osd $dir $oi || return 1
+ pids[$oi]=$(cat $dir/osd.$oi.pid)
+ done
+
+ create_rbd_pool || return 1
+
+ # make some objects so osds to ensure connectivity between osds
+ timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1
+ sleep 1
+
+ killid=0
+ previd=0
+
+ # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
+ for i in {1..2}; do
+ while [ $killid -eq $previd ]; do
+ killid=${pids[$RANDOM%${#pids[@]}]}
+ done
+ previd=$killid
+
+ kill -9 $killid
+ time_left=$MAX_PROPAGATION_TIME
+ down_osds=0
+
+ while [ $time_left -gt 0 ]; do
+ sleep 1
+ time_left=$[$time_left - 1];
+
+ grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null
+ if [ $? -ne 0 ]; then
+ continue
+ fi
+
+ down_osds=$(ceph osd tree | grep -c down)
+ if [ $down_osds -lt $i ]; then
+ # osds not marked down yet, try again in a second
+ continue
+ elif [ $down_osds -gt $i ]; then
+ echo Too many \($down_osds\) osds died!
+ return 1
+ else
+ break
+ fi
+ done
+
+ if [ $down_osds -lt $i ]; then
+ echo Killed the OSD, yet it is not marked down
+ ceph osd tree
+ return 1
+ fi
+ done
+ pkill -SIGTERM rados
+ teardown $dir || return 1
+}
+
+main osd-fast-mark-down "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh"
+# End:
diff --git a/qa/standalone/osd/osd-force-create-pg.sh b/qa/standalone/osd/osd-force-create-pg.sh
new file mode 100755
index 000000000..ca4b0239e
--- /dev/null
+++ b/qa/standalone/osd/osd-force-create-pg.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7145" # git grep '\<7145\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_reuse_id() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create foo 50 || return 1
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.0
+ kill_daemons $dir TERM osd.1
+ kill_daemons $dir TERM osd.2
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force
+ ceph-objectstore-tool --data-path $dir/1 --op remove --pgid 1.0 --force
+ ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.0 --force
+ activate_osd $dir 0 || return 1
+ activate_osd $dir 1 || return 1
+ activate_osd $dir 2 || return 1
+ sleep 10
+ ceph pg ls | grep 1.0 | grep stale || return 1
+
+ ceph osd force-create-pg 1.0 --yes-i-really-mean-it || return 1
+ wait_for_clean || return 1
+}
+
+main osd-force-create-pg "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-force-create-pg.sh"
+# End:
diff --git a/qa/standalone/osd/osd-markdown.sh b/qa/standalone/osd/osd-markdown.sh
new file mode 100755
index 000000000..5c4a78440
--- /dev/null
+++ b/qa/standalone/osd/osd-markdown.sh
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Intel <contact@intel.com.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Xiaoxi Chen <xiaoxi.chen@intel.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7108" # git grep '\<7108\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function markdown_N_impl() {
+ markdown_times=$1
+ total_time=$2
+ sleeptime=$3
+ for i in `seq 1 $markdown_times`
+ do
+ # check the OSD is UP
+ ceph tell osd.0 get_latest_osdmap || return 1
+ ceph osd tree
+ ceph osd tree | grep osd.0 |grep up || return 1
+ # mark the OSD down.
+ # override any dup setting in the environment to ensure we do this
+ # exactly once (modulo messenger failures, at least; we can't *actually*
+ # provide exactly-once semantics for mon commands).
+ ( unset CEPH_CLI_TEST_DUP_COMMAND ; ceph osd down 0 )
+ sleep $sleeptime
+ done
+}
+
+
+function TEST_markdown_exceed_maxdown_count() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_rbd_pool || return 1
+
+ # 3+1 times within 300s, osd should stay dead on the 4th time
+ local count=3
+ local sleeptime=10
+ local period=300
+ ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+ ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+ markdown_N_impl $(($count+1)) $period $sleeptime
+ # down N+1 times ,the osd.0 should die
+ ceph osd tree | grep down | grep osd.0 || return 1
+}
+
+function TEST_markdown_boot() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_rbd_pool || return 1
+
+ # 3 times within 120s, should stay up
+ local count=3
+ local sleeptime=10
+ local period=120
+ ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+ ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+ markdown_N_impl $count $period $sleeptime
+ #down N times, osd.0 should be up
+ sleep 15 # give osd plenty of time to notice and come back up
+ ceph tell osd.0 get_latest_osdmap || return 1
+ ceph osd tree | grep up | grep osd.0 || return 1
+}
+
+function TEST_markdown_boot_exceed_time() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ create_rbd_pool || return 1
+
+ # 3+1 times, but over 40s, > 20s, so should stay up
+ local count=3
+ local period=20
+ local sleeptime=10
+ ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+ ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+ markdown_N_impl $(($count+1)) $period $sleeptime
+ sleep 15 # give osd plenty of time to notice and come back up
+ ceph tell osd.0 get_latest_osdmap || return 1
+ ceph osd tree | grep up | grep osd.0 || return 1
+}
+
+function TEST_osd_stop() {
+
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ osd_0_pid=$(cat $dir/osd.0.pid)
+ ps -p $osd_0_pid || return 1
+
+ ceph osd tree | grep osd.0 | grep up || return 1
+ ceph osd stop osd.0
+ sleep 15 # give osd plenty of time to notice and exit
+ ceph osd tree | grep down | grep osd.0 || return 1
+ ! ps -p $osd_0_pid || return 1
+}
+
+main osd-markdown "$@"
diff --git a/qa/standalone/osd/osd-reactivate.sh b/qa/standalone/osd/osd-reactivate.sh
new file mode 100755
index 000000000..6d6438629
--- /dev/null
+++ b/qa/standalone/osd/osd-reactivate.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+#
+# Author: Vicente Cheng <freeze.bilsted@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_reactivate() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ kill_daemons $dir TERM osd || return 1
+
+ ready_path=$dir"/0/ready"
+ activate_path=$dir"/0/active"
+ # trigger mkfs again
+ rm -rf $ready_path $activate_path
+ activate_osd $dir 0 || return 1
+
+}
+
+main osd-reactivate "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reactivate.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-prio.sh b/qa/standalone/osd/osd-recovery-prio.sh
new file mode 100755
index 000000000..02b65f67a
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-prio.sh
@@ -0,0 +1,542 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+ # Set osd op queue = wpq for the tests. Recovery priority is not
+ # considered by mclock_scheduler leading to unexpected results.
+ CEPH_ARGS+="--osd-op-queue=wpq "
+ export objects=200
+ export poolprefix=test
+ export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
+ export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function TEST_recovery_priority() {
+ local dir=$1
+ local pools=10
+ local OSDS=5
+ local max_tries=10
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 3 pools with a pg with the same primaries but second
+ # replica on another osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1_1
+ local chk_osd1_2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ local chk_osd2
+
+ local PG3
+ local POOLNUM3
+ local pool3
+
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ -z "$PG1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1_1=$test_osd1
+ chk_osd1_2=$test_osd2
+ elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ chk_osd2=$test_osd2
+ elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
+ then
+ PG3="${p}.0"
+ POOLNUM3=$p
+ pool3="${poolprefix}$p"
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" -o "pool3" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/data bs=1M count=10
+ p=1
+ for pname in $pool1 $pool2 $pool3
+ do
+ for i in $(seq 1 $objects)
+ do
+ rados -p ${pname} put obj${i}-p${p} $dir/data
+ done
+ p=$(expr $p + 1)
+ done
+
+ local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+ ceph pg dump pgs
+ ERRORS=0
+
+ ceph osd set norecover
+ ceph osd set noout
+
+ # Get a pg to want to recover and quickly force it
+ # to be preempted.
+ ceph osd pool set $pool3 size 2
+ sleep 2
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ # 3. Item is in progress, adjust priority with no higher priority waiting
+ for i in $(seq 1 $max_tries)
+ do
+ if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then
+ break
+ fi
+ if [ "$i" = "$max_tries" ]; then
+ echo "ERROR: Didn't appear to be able to force-recovery"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ sleep 2
+ done
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ ceph osd out osd.$chk_osd1_2
+ sleep 2
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+ ceph pg dump pgs
+
+ ceph osd pool set $pool2 size 2
+ sleep 2
+ flush_pg_stats || return 1
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ ceph pg dump pgs
+
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG3} ];
+ then
+ echo "The first force-recovery PG $PG3 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # 1. Item is queued, re-queue with new priority
+ for i in $(seq 1 $max_tries)
+ do
+ if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then
+ break
+ fi
+ if [ "$i" = "$max_tries" ]; then
+ echo "ERROR: Didn't appear to be able to force-recovery"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ sleep 2
+ done
+ sleep 2
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+ if [ "$PRIO" != "$FORCE_PRIO" ];
+ then
+ echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ flush_pg_stats || return 1
+
+ # 4. Item is in progress, if higher priority items waiting prempt item
+ #ceph osd unset norecover
+ ceph pg cancel-force-recovery $PG3 || return 1
+ sleep 2
+ #ceph osd set norecover
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The force-recovery PG $PG2 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ ceph pg cancel-force-recovery $PG2 || return 1
+ sleep 5
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+ # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
+ flush_pg_stats || return 1
+ ceph pg force-recovery $PG3 || return 1
+ sleep 2
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+ cat $dir/out
+ PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+ if [ "$PRIO" != "$NORMAL_PRIO" ];
+ then
+ echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG3} ];
+ then
+ echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $FORCE_PRIO ];
+ then
+ echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ ceph osd unset noout
+ ceph osd unset norecover
+
+ wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
+
+ ceph pg dump pgs
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "$ERRORS error(s) found"
+ else
+ echo TEST PASSED
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ delete_pool $pool3
+ kill_daemons $dir || return 1
+ return $ERRORS
+}
+
+#
+# Show that pool recovery_priority is added to recovery priority
+#
+# Create 2 pools with 2 OSDs with different primarys
+# pool 1 with recovery_priority 1
+# pool 2 with recovery_priority 2
+#
+# Start recovery by changing the pool sizes from 1 to 2
+# Use dump_recovery_reservations to verify priorities
+function TEST_recovery_pool_priority() {
+ local dir=$1
+ local pools=3 # Don't assume the first 2 pools are exact what we want
+ local OSDS=2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+ sleep 5
+
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ # Find 2 pools with different primaries which
+ # means the replica must be on another osd.
+ local PG1
+ local POOLNUM1
+ local pool1
+ local chk_osd1_1
+ local chk_osd1_2
+
+ local PG2
+ local POOLNUM2
+ local pool2
+ local chk_osd2_1
+ local chk_osd2_2
+
+ for p in $(seq 1 $pools)
+ do
+ ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+ local test_osd1=$(head -1 $dir/acting)
+ local test_osd2=$(tail -1 $dir/acting)
+ if [ -z "$PG1" ];
+ then
+ PG1="${p}.0"
+ POOLNUM1=$p
+ pool1="${poolprefix}$p"
+ chk_osd1_1=$test_osd1
+ chk_osd1_2=$test_osd2
+ elif [ $chk_osd1_1 != $test_osd1 ];
+ then
+ PG2="${p}.0"
+ POOLNUM2=$p
+ pool2="${poolprefix}$p"
+ chk_osd2_1=$test_osd1
+ chk_osd2_2=$test_osd2
+ break
+ fi
+ done
+ rm -f $dir/acting
+
+ if [ "$pool2" = "" ];
+ then
+ echo "Failure to find appropirate PGs"
+ return 1
+ fi
+
+ for p in $(seq 1 $pools)
+ do
+ if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+ then
+ delete_pool ${poolprefix}$p
+ fi
+ done
+
+ pool1_extra_prio=1
+ pool2_extra_prio=2
+ pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio)
+ pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio)
+
+ ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
+ ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+ ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/data bs=1M count=10
+ p=1
+ for pname in $pool1 $pool2
+ do
+ for i in $(seq 1 $objects)
+ do
+ rados -p ${pname} put obj${i}-p${p} $dir/data
+ done
+ p=$(expr $p + 1)
+ done
+
+ local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+ ceph pg dump pgs
+ ERRORS=0
+
+ ceph osd pool set $pool1 size 2
+ ceph osd pool set $pool2 size 2
+
+ # Wait for both PGs to be in recovering state
+ ceph pg dump pgs
+
+ # Wait for recovery to start
+ set -o pipefail
+ count=0
+ while(true)
+ do
+ if test $(ceph --format json pg dump pgs |
+ jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2"
+ then
+ break
+ fi
+ sleep 2
+ if test "$count" -eq "10"
+ then
+ echo "Recovery never started on both PGs"
+ return 1
+ fi
+ count=$(expr $count + 1)
+ done
+ set +o pipefail
+ ceph pg dump pgs
+
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
+ echo osd.${chk_osd1_1}
+ cat $dir/dump.${chk_osd1_1}.out
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
+ echo osd.${chk_osd1_2}
+ cat $dir/dump.${chk_osd1_2}.out
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG1} ];
+ then
+ echo "The primary PG for $pool1 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool1_prio ];
+ then
+ echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG1} ];
+ then
+ echo "The primary PG for $pool1 didn't become the in progress item on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool1_prio ];
+ then
+ echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The primary PG for $pool2 didn't become the in progress item"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool2_prio ];
+ then
+ echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ # Using eval will strip double-quotes from item
+ eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
+ if [ "$ITEM" != ${PG2} ];
+ then
+ echo "The primary PG $PG2 didn't become the in progress item on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ else
+ PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
+ if [ "$PRIO" != $pool2_prio ];
+ then
+ echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ fi
+
+ wait_for_clean || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "$ERRORS error(s) found"
+ else
+ echo TEST PASSED
+ fi
+
+ delete_pool $pool1
+ delete_pool $pool2
+ kill_daemons $dir || return 1
+ return $ERRORS
+}
+
+main osd-recovery-prio "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-space.sh b/qa/standalone/osd/osd-recovery-space.sh
new file mode 100755
index 000000000..3bafc5138
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-space.sh
@@ -0,0 +1,176 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_max_backfills=10 "
+ CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+ export objects=600
+ export poolprefix=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function get_num_in_state() {
+ local state=$1
+ local expression
+ expression+="select(contains(\"${state}\"))"
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+
+function wait_for_state() {
+ local state=$1
+ local cur_in_state
+ local -a delays=($(get_timeout_delays $2 5))
+ local -i loop=0
+
+ flush_pg_stats || return 1
+ while test $(get_num_pgs) == 0 ; do
+ sleep 1
+ done
+
+ while true ; do
+ cur_in_state=$(get_num_in_state ${state})
+ test $cur_in_state -gt 0 && break
+ if (( $loop >= ${#delays[*]} )) ; then
+ ceph pg dump pgs
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+ return 0
+}
+
+
+function wait_for_recovery_toofull() {
+ local timeout=$1
+ wait_for_state recovery_toofull $timeout
+}
+
+
+# Create 1 pools with size 1
+# set ful-ratio to 50%
+# Write data 600 5K (3000K)
+# Inject fake_statfs_for_testing to 3600K (83% full)
+# Incresase the pool size to 2
+# The pool shouldn't have room to recovery
+function TEST_recovery_test_simple() {
+ local dir=$1
+ local pools=1
+ local OSDS=2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-nearfull-ratio .40
+ ceph osd set-backfillfull-ratio .45
+ ceph osd set-full-ratio .50
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+ done
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
+ for o in $(seq 1 $objects)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+
+ for o in $(seq 0 $(expr $OSDS - 1))
+ do
+ ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
+ done
+ sleep 5
+
+ ceph pg dump pgs
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+
+ # If this times out, we'll detected errors below
+ wait_for_recovery_toofull 30
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in recovery_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+ ceph status
+ ceph status --format=json-pretty > $dir/stat.json
+
+ eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
+ if [ "$SEV" != "HEALTH_ERR" ]; then
+ echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
+ if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
+ echo "PG_RECOVERY_FULL message '$MSG' mismatched"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ rm -f $dir/stat.json
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+
+main osd-recovery-space "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-stats.sh b/qa/standalone/osd/osd-recovery-stats.sh
new file mode 100755
index 000000000..ad6f810d7
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-stats.sh
@@ -0,0 +1,512 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ # Fix port????
+ export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # so we will not force auth_log_shard to be acting_primary
+ CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+ export margin=10
+ export objects=200
+ export poolname=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function below_margin() {
+ local -i check=$1
+ shift
+ local -i target=$1
+
+ return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
+}
+
+function above_margin() {
+ local -i check=$1
+ shift
+ local -i target=$1
+
+ return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
+}
+
+FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
+FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
+FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
+
+function check() {
+ local dir=$1
+ local PG=$2
+ local primary=$3
+ local type=$4
+ local degraded_start=$5
+ local degraded_end=$6
+ local misplaced_start=$7
+ local misplaced_end=$8
+ local primary_start=${9:-}
+ local primary_end=${10:-}
+
+ local log=$dir/osd.${primary}.log
+
+ local addp=" "
+ if [ "$type" = "erasure" ];
+ then
+ addp="p"
+ fi
+
+ UPACT=$(eval $FIND_UPACT)
+
+ # Check 3rd line at start because of false recovery starts
+ local which="degraded"
+ FIRST=$(eval $FIND_FIRST)
+ below_margin $FIRST $degraded_start || return 1
+ LAST=$(eval $FIND_LAST)
+ above_margin $LAST $degraded_end || return 1
+
+ # Check 3rd line at start because of false recovery starts
+ which="misplaced"
+ FIRST=$(eval $FIND_FIRST)
+ below_margin $FIRST $misplaced_start || return 1
+ LAST=$(eval $FIND_LAST)
+ above_margin $LAST $misplaced_end || return 1
+
+ # This is the value of set into MISSING_ON_PRIMARY
+ if [ -n "$primary_start" ];
+ then
+ which="shard $primary"
+ FIRST=$(eval $FIND_FIRST)
+ below_margin $FIRST $primary_start || return 1
+ LAST=$(eval $FIND_LAST)
+ above_margin $LAST $primary_end || return 1
+ fi
+}
+
+# [1,0,?] -> [1,2,4]
+# degraded 500 -> 0
+# active+recovering+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467
+function do_recovery_out1() {
+ local dir=$1
+ shift
+ local type=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ if [ $type = "erasure" ];
+ then
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 $type myprofile
+ else
+ create_pool $poolname 1 1 $type
+ fi
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ # Only 2 OSDs so only 1 not primary
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set norecover
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ ceph osd out osd.${otherosd}
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ check $dir $PG $primary $type $objects 0 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+function TEST_recovery_replicated_out1() {
+ local dir=$1
+
+ do_recovery_out1 $dir replicated || return 1
+}
+
+function TEST_recovery_erasure_out1() {
+ local dir=$1
+
+ do_recovery_out1 $dir erasure || return 1
+}
+
+# [0, 1] -> [2,3,4,5]
+# degraded 1000 -> 0
+# misplaced 1000 -> 0
+# missing on primary 500 -> 0
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748
+function TEST_recovery_sizeup() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ # Only 2 OSDs so only 1 not primary
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set norecover
+ ceph osd out osd.$primary osd.$otherosd
+ ceph osd pool set test size 4
+ ceph osd unset norecover
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+
+ ceph tell osd.${primary} debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local degraded=$(expr $objects \* 2)
+ local misplaced=$(expr $objects \* 2)
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [0, 1, 2, 4] -> [3, 5]
+# misplaced 1000 -> 0
+# missing on primary 500 -> 0
+# active+recovering+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248
+function TEST_recovery_sizedown() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+ run_osd $dir 4 || return 1
+ run_osd $dir 5 || return 1
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 4
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ # Only 2 OSDs so only 1 not primary
+ local allosds=$(get_osds $poolname obj1)
+
+ ceph osd set norecover
+ for osd in $allosds
+ do
+ ceph osd out osd.$osd
+ done
+
+ ceph osd pool set test size 2
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+
+ local misplaced=$(expr $objects \* 2)
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1
+
+ UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/")
+
+ # This is the value of set into MISSING_ON_PRIMARY
+ FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/")
+ below_margin $FIRST $objects || return 1
+ LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/")
+ above_margin $LAST 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [1] -> [1,2]
+# degraded 300 -> 200
+# active+recovering+undersized+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563
+function TEST_recovery_undersized() {
+ local dir=$1
+
+ local osds=3
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $(seq 0 $(expr $osds - 1))
+ do
+ run_osd $dir $i || return 1
+ done
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+
+ ceph osd set norecover
+ # Mark any osd not the primary (only 1 replica so also has no replica)
+ for i in $(seq 0 $(expr $osds - 1))
+ do
+ if [ $i = $primary ];
+ then
+ continue
+ fi
+ ceph osd out osd.$i
+ break
+ done
+ ceph osd pool set test size 4
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean()
+ sleep 10
+ flush_pg_stats || return 1
+
+ # Wait for recovery to finish
+ # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded
+ # to active+undersized+degraded
+ for i in $(seq 1 300)
+ do
+ if ceph pg dump pgs | grep ^$PG | grep -qv recovering
+ then
+ break
+ fi
+ if [ $i = "300" ];
+ then
+ echo "Timeout waiting for recovery to finish"
+ return 1
+ fi
+ sleep 1
+ done
+
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+ local log=$dir/osd.${primary}.log
+
+ local first_degraded=$(expr $objects \* 3)
+ local last_degraded=$(expr $objects \* 2)
+ check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+# [1,0,2] -> [1,3,NONE]/[1,3,2]
+# degraded 100 -> 0
+# misplaced 100 -> 100
+# active+recovering+degraded+remapped
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242
+function TEST_recovery_erasure_remapped() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_osd $dir 3 || return 1
+
+ ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+ create_pool $poolname 1 1 erasure myprofile
+ ceph osd pool set $poolname min_size 2
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local PG=$(get_pg $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set norecover
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+ ceph osd out osd.${otherosd}
+
+ # Mark osd not the primary and not down/out osd as just out
+ for i in 0 1 2 3
+ do
+ if [ $i = $primary ];
+ then
+ continue
+ fi
+ if [ $i = $otherosd ];
+ then
+ continue
+ fi
+ ceph osd out osd.$i
+ break
+ done
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary erasure $objects 0 $objects $objects || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+function TEST_recovery_multi() {
+ local dir=$1
+
+ local osds=6
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $(seq 0 $(expr $osds - 1))
+ do
+ run_osd $dir $i || return 1
+ done
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 1
+
+ wait_for_clean || return 1
+
+ rados -p $poolname put obj1 /dev/null
+
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+
+ ceph osd set noout
+ ceph osd set norecover
+ kill $(cat $dir/osd.${otherosd}.pid)
+ ceph osd down osd.${otherosd}
+
+ local half=$(expr $objects / 2)
+ for i in $(seq 2 $half)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ kill $(cat $dir/osd.${primary}.pid)
+ ceph osd down osd.${primary}
+ activate_osd $dir ${otherosd}
+ sleep 3
+
+ for i in $(seq $(expr $half + 1) $objects)
+ do
+ rados -p $poolname put obj$i /dev/null
+ done
+
+ local PG=$(get_pg $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj$objects)
+
+ ceph osd unset noout
+ ceph osd out osd.$primary osd.$otherosd
+ activate_osd $dir ${primary}
+ sleep 3
+
+ ceph osd pool set test size 4
+ ceph osd unset norecover
+ ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+ sleep 2
+
+ wait_for_clean || return 1
+
+ # Get new primary
+ primary=$(get_primary $poolname obj1)
+
+ local log=$dir/osd.${primary}.log
+ check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+main osd-recovery-stats "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"
+# End:
diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh
new file mode 100755
index 000000000..6fea441b3
--- /dev/null
+++ b/qa/standalone/osd/osd-rep-recov-eio.sh
@@ -0,0 +1,422 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+#
+# Author: Kefu Chai <kchai@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+warnings=10
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ # set warning amount in case default changes
+ run_mon $dir a --mon_osd_warn_num_repaired=$warnings || return 1
+ run_mgr $dir x || return 1
+ ceph osd pool create foo 8 || return 1
+
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function setup_osds() {
+ local count=$1
+ shift
+ local type=$1
+
+ for id in $(seq 0 $(expr $count - 1)) ; do
+ run_osd${type} $dir $id || return 1
+ done
+ wait_for_clean || return 1
+}
+
+function get_state() {
+ local pgid=$1
+ local sname=state
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function rados_put() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+
+ for marker in AAA BBB CCCC DDDD ; do
+ printf "%*s" 1024 $marker
+ done > $dir/ORIGINAL
+ #
+ # get and put an object, compare they are equal
+ #
+ rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+}
+
+function rados_get() {
+ local dir=$1
+ local poolname=$2
+ local objname=${3:-SOMETHING}
+ local expect=${4:-ok}
+
+ #
+ # Expect a failure to get object
+ #
+ if [ $expect = "fail" ];
+ then
+ ! rados --pool $poolname get $objname $dir/COPY
+ return
+ fi
+ #
+ # Expect hang trying to get object
+ #
+ if [ $expect = "hang" ];
+ then
+ timeout 5 rados --pool $poolname get $objname $dir/COPY
+ test "$?" = "124"
+ return
+ fi
+ #
+ # get an object, compare with $dir/ORIGINAL
+ #
+ rados --pool $poolname get $objname $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ rm $dir/COPY
+}
+
+function rados_get_data() {
+ local inject=$1
+ shift
+ local dir=$1
+
+ local poolname=pool-rep
+ local objname=obj-$inject-$$
+ local pgid=$(get_pg $poolname $objname)
+
+ rados_put $dir $poolname $objname || return 1
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "1" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "1" || return 1
+
+ local object_osds=($(get_osds $poolname $objname))
+ local primary=${object_osds[0]}
+ local bad_peer=${object_osds[1]}
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 1 || return 1
+ # Force primary to pull from the bad peer, so we can repair it too!
+ set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ # Wait until automatic repair of bad peer is done
+ wait_for_clean || return 1
+
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 2 || return 1
+ rados_get $dir $poolname $objname || return 1
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "3" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "4" || return 1
+
+ inject_$inject rep data $poolname $objname $dir 0 || return 1
+ inject_$inject rep data $poolname $objname $dir 1 || return 1
+ inject_$inject rep data $poolname $objname $dir 2 || return 1
+ rados_get $dir $poolname $objname hang || return 1
+
+ wait_for_clean
+ # After hang another repair couldn't happen, so count stays the same
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "3" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "4" || return 1
+}
+
+function TEST_rados_get_with_eio() {
+ local dir=$1
+
+ setup_osds 4 || return 1
+
+ local poolname=pool-rep
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+ rados_get_data eio $dir || return 1
+
+ delete_pool $poolname
+}
+
+function TEST_rados_repair_warning() {
+ local dir=$1
+ local OBJS=$(expr $warnings + 1)
+
+ setup_osds 4 || return 1
+
+ local poolname=pool-rep
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ local poolname=pool-rep
+ local objbase=obj-warn
+ local inject=eio
+
+ for i in $(seq 1 $OBJS)
+ do
+ rados_put $dir $poolname ${objbase}-$i || return 1
+ inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
+ rados_get $dir $poolname ${objbase}-$i || return 1
+ done
+ local pgid=$(get_pg $poolname ${objbase}-1)
+
+ local object_osds=($(get_osds $poolname ${objbase}-1))
+ local primary=${object_osds[0]}
+ local bad_peer=${object_osds[1]}
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$OBJS" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$OBJS" || return 1
+
+ ceph health | grep -q "Too many repaired reads on 1 OSDs" || return 1
+ ceph health detail | grep -q "osd.$primary had $OBJS reads repaired" || return 1
+
+ ceph health mute OSD_TOO_MANY_REPAIRS
+ set -o pipefail
+ # Should mute this
+ ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+ set +o pipefail
+
+ for i in $(seq 1 $OBJS)
+ do
+ inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
+ inject_$inject rep data $poolname ${objbase}-$i $dir 1 || return 1
+ # Force primary to pull from the bad peer, so we can repair it too!
+ set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+ rados_get $dir $poolname ${objbase}-$i || return 1
+ done
+
+ wait_for_clean
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$(expr $OBJS \* 2)" || return 1
+ flush_pg_stats
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
+
+ # Give mon a chance to notice additional OSD and unmute
+ # The default tick time is 5 seconds
+ CHECKTIME=10
+ LOOPS=0
+ while(true)
+ do
+ sleep 1
+ if ceph health | grep -q "Too many repaired reads on 2 OSDs"
+ then
+ break
+ fi
+ LOOPS=$(expr $LOOPS + 1)
+ if test "$LOOPS" = "$CHECKTIME"
+ then
+ echo "Too many repaired reads not seen after $CHECKTIME seconds"
+ return 1
+ fi
+ done
+ ceph health detail | grep -q "osd.$primary had $(expr $OBJS \* 2) reads repaired" || return 1
+ ceph health detail | grep -q "osd.$bad_peer had $OBJS reads repaired" || return 1
+
+ delete_pool $poolname
+}
+
+# Test backfill with unfound object
+function TEST_rep_backfill_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=300
+ # Must be between 1 and $lastobj
+ local testobj=obj250
+
+ export CEPH_ARGS
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+ setup_osds 3 || return 1
+
+ local poolname=test-pool
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio rep data $poolname $testobj $dir 0 || return 1
+ inject_eio rep data $poolname $testobj $dir 1 || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 360); do
+ state=$(get_state 2.0)
+ echo $state | grep backfill_unfound
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo "$state "
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_pool $poolname
+}
+
+# Test recovery with unfound object
+function TEST_rep_recovery_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=100
+ # Must be between 1 and $lastobj
+ local testobj=obj75
+
+ setup_osds 3 || return 1
+
+ local poolname=test-pool
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio rep data $poolname $testobj $dir 0 || return 1
+ inject_eio rep data $poolname $testobj $dir 1 || return 1
+
+ activate_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 100); do
+ state=$(get_state 2.0)
+ echo $state | grep -v recovering
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo "$state "
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_pool $poolname
+}
+
+main osd-rep-recov-eio.sh "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh"
+# End:
diff --git a/qa/standalone/osd/osd-reuse-id.sh b/qa/standalone/osd/osd-reuse-id.sh
new file mode 100755
index 000000000..b24b6f2eb
--- /dev/null
+++ b/qa/standalone/osd/osd-reuse-id.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7123" # git grep '\<7123\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_reuse_id() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ destroy_osd $dir 1 || return 1
+ run_osd $dir 1 || return 1
+}
+
+main osd-reuse-id "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reuse-id.sh"
+# End:
diff --git a/qa/standalone/osd/pg-split-merge.sh b/qa/standalone/osd/pg-split-merge.sh
new file mode 100755
index 000000000..7f2899b60
--- /dev/null
+++ b/qa/standalone/osd/pg-split-merge.sh
@@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10"
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_a_merge_empty() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create foo 2 || return 1
+ ceph osd pool set foo pgp_num 1 || return 1
+
+ wait_for_clean || return 1
+
+ # note: we need 1.0 to have the same or more objects than 1.1
+ # 1.1
+ rados -p foo put foo1 /etc/passwd
+ rados -p foo put foo2 /etc/passwd
+ rados -p foo put foo3 /etc/passwd
+ rados -p foo put foo4 /etc/passwd
+ # 1.0
+ rados -p foo put foo5 /etc/passwd
+ rados -p foo put foo6 /etc/passwd
+ rados -p foo put foo8 /etc/passwd
+ rados -p foo put foo10 /etc/passwd
+ rados -p foo put foo11 /etc/passwd
+ rados -p foo put foo12 /etc/passwd
+ rados -p foo put foo16 /etc/passwd
+
+ wait_for_clean || return 1
+
+ ceph tell osd.1 config set osd_debug_no_purge_strays true
+ ceph osd pool set foo size 2 || return 1
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.2 || return 1
+ ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1
+ activate_osd $dir 2 || return 1
+
+ wait_for_clean || return 1
+
+ # osd.2: now 1.0 is there but 1.1 is not
+
+ # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is
+ # the problematic state... then let it merge with 1.0
+ ceph tell osd.2 config set osd_debug_no_acting_change true
+ ceph osd out 0 1
+ ceph osd pool set foo pg_num 1
+ sleep 5
+ ceph tell osd.2 config set osd_debug_no_acting_change false
+
+ # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get
+ # removed
+ ceph osd in 0 1
+ ceph osd pool set foo size 3
+
+ wait_for_clean || return 1
+
+ # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing
+ # half of its objects).
+ ceph pg scrub 1.0
+ sleep 10
+ ceph log last debug
+ ceph pg ls
+ ceph pg ls | grep ' active.clean ' || return 1
+}
+
+function TEST_import_after_merge_and_gap() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd pool create foo 2 || return 1
+ wait_for_clean || return 1
+ rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
+ activate_osd $dir 0 || return 1
+
+ ceph osd pool set foo pg_num 1
+ sleep 5
+ while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done
+ wait_for_clean || return 1
+
+ #
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+ # this will import both halves the original pg
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+
+ # make a map gap
+ for f in `seq 1 50` ; do
+ ceph osd set nodown
+ ceph osd unset nodown
+ done
+
+ # poke and prod to ensure last_epech_clean is big, reported to mon, and
+ # the osd is able to trim old maps
+ rados -p foo bench 1 write -b 1024 --no-cleanup || return 1
+ wait_for_clean || return 1
+ ceph tell osd.0 send_beacon
+ sleep 5
+ ceph osd set nodown
+ ceph osd unset nodown
+ sleep 5
+
+ kill_daemons $dir TERM osd.0 || return 1
+
+ # this should fail.. 1.1 still doesn't exist
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1
+
+ # this should fail too because of the gap
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ # we can force it...
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1
+
+ # ...but the osd won't start, so remove it again.
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1
+
+
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+}
+
+function TEST_import_after_split() {
+ local dir=$1
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+
+ ceph osd pool create foo 1 || return 1
+ wait_for_clean || return 1
+ rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+ kill_daemons $dir TERM osd.0 || return 1
+ ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1
+ activate_osd $dir 0 || return 1
+
+ ceph osd pool set foo pg_num 2
+ sleep 5
+ while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
+ wait_for_clean || return 1
+
+ kill_daemons $dir TERM osd.0 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+
+ # this should fail because 1.1 (split child) is there
+ ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+ # now it will work (1.1. is gone)
+ ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+ activate_osd $dir 0 || return 1
+
+ wait_for_clean || return 1
+}
+
+
+main pg-split-merge "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh"
+# End:
diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh
new file mode 100755
index 000000000..af406ef92
--- /dev/null
+++ b/qa/standalone/osd/repeer-on-acting-back.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn>
+#
+# Author: xie xingguo <xie.xingguo@zte.com.cn>
+# Author: Yan Jun <yan.jun8@zte.com.cn>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export poolname=test
+ export testobjects=100
+ export loglen=12
+ export trim=$(expr $loglen / 2)
+ export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ # so we will not force auth_log_shard to be acting_primary
+ CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+ # use small pg_log settings, so we always do backfill instead of recovery
+ CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function TEST_repeer_on_down_acting_member_coming_back() {
+ local dir=$1
+ local dummyfile='/etc/fstab'
+
+ local num_osds=6
+ local osds="$(seq 0 $(expr $num_osds - 1))"
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for i in $osds
+ do
+ run_osd $dir $i || return 1
+ done
+
+ create_pool $poolname 1 1
+ ceph osd pool set $poolname size 3
+ ceph osd pool set $poolname min_size 2
+ local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid')
+ local pgid=$poolid.0
+
+ # enable required feature-bits for upmap
+ ceph osd set-require-min-compat-client luminous
+ # reset up to [1,2,3]
+ ceph osd pg-upmap $pgid 1 2 3 || return 1
+
+ flush_pg_stats || return 1
+ wait_for_clean || return 1
+
+ echo "writing initial objects"
+ # write a bunch of objects
+ for i in $(seq 1 $testobjects)
+ do
+ rados -p $poolname put existing_$i $dummyfile
+ done
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ # reset up to [1,4,5]
+ ceph osd pg-upmap $pgid 1 4 5 || return 1
+
+ # wait for peering to complete
+ sleep 2
+
+ # make sure osd.2 belongs to current acting set
+ ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+ # kill osd.2
+ kill_daemons $dir KILL osd.2 || return 1
+ ceph osd down osd.2
+
+ # again, wait for peering to complete
+ sleep 2
+
+ # osd.2 should have been moved out from acting set
+ ceph pg $pgid query | jq '.acting' | grep 2 && return 1
+
+ # bring up osd.2
+ activate_osd $dir 2 || return 1
+ wait_for_osd up 2
+
+ # again, wait for peering to complete
+ sleep 2
+
+ # primary should be able to re-add osd.2 into acting
+ ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+ WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+ if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log')
+ then
+ echo failure
+ return 1
+ fi
+ echo "success"
+
+ delete_pool $poolname
+ kill_daemons $dir || return 1
+}
+
+main repeer-on-acting-back "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh"
+# End:
diff --git a/qa/standalone/osd/repro_long_log.sh b/qa/standalone/osd/repro_long_log.sh
new file mode 100755
index 000000000..fa27d7017
--- /dev/null
+++ b/qa/standalone/osd/repro_long_log.sh
@@ -0,0 +1,197 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: Josh Durgin <jdurgin@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+PGID=
+
+function test_log_size()
+{
+ local PGID=$1
+ local EXPECTED=$2
+ local DUPS_EXPECTED=${3:-0}
+ ceph tell osd.\* flush_pg_stats
+ sleep 3
+ ceph pg $PGID query | jq .info.stats.log_size
+ ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}"
+ ceph pg $PGID query | jq .info.stats.log_dups_size
+ ceph pg $PGID query | jq .info.stats.log_dups_size | grep "${DUPS_EXPECTED}"
+}
+
+function setup_log_test() {
+ local dir=$1
+ local which=$2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create test 1 1 || true
+ POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool')
+ PGID="${POOL_ID}.0"
+
+ # With 1 PG setting entries per osd 20 results in a target log of 20
+ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 20 || return 1
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1
+ ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1
+ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1
+
+ touch $dir/foo
+ for i in $(seq 1 20)
+ do
+ rados -p test put foo $dir/foo || return 1
+ done
+
+ test_log_size $PGID 20 || return 1
+
+ rados -p test rm foo || return 1
+
+ # generate error entries
+ for i in $(seq 1 20)
+ do
+ rados -p test rm foo
+ done
+
+ # log should have been trimmed down to min_entries with one extra
+ test_log_size $PGID 21 || return 1
+}
+
+function TEST_repro_long_log1()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+ # regular write should trim the log
+ rados -p test put foo $dir/foo || return 1
+ test_log_size $PGID 22 || return 1
+}
+
+function TEST_repro_long_log2()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+ local PRIMARY=$(ceph pg $PGID query | jq '.info.stats.up_primary')
+ kill_daemons $dir TERM osd.$PRIMARY || return 1
+ CEPH_ARGS="--osd-max-pg-log-entries=2 --osd-pg-log-dups-tracked=3 --no-mon-config" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1
+ activate_osd $dir $PRIMARY || return 1
+ wait_for_clean || return 1
+ test_log_size $PGID 21 18 || return 1
+}
+
+function TEST_trim_max_entries()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+
+ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4
+ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 0
+
+ # adding log entries, should only trim 4 and add one each time
+ rados -p test rm foo
+ test_log_size $PGID 18 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 15 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 12 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 9 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 6 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 || return 1
+
+ # below trim_min
+ rados -p test rm foo
+ test_log_size $PGID 4 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 4 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 || return 1
+}
+
+function TEST_trim_max_entries_with_dups()
+{
+ local dir=$1
+
+ setup_log_test $dir || return 1
+
+ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1
+ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2
+ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4
+ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1
+
+ # adding log entries, should only trim 4 and add one each time
+ # dups should be trimmed to 1
+ rados -p test rm foo
+ test_log_size $PGID 18 2 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 15 6 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 12 10 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 9 14 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 6 18 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 20 || return 1
+
+ # below trim_min
+ rados -p test rm foo
+ test_log_size $PGID 4 20 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 20 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 4 20 || return 1
+ rados -p test rm foo
+ test_log_size $PGID 3 20 || return 1
+}
+
+main repro-long-log "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-mapper.sh b/qa/standalone/scrub/osd-mapper.sh
new file mode 100755
index 000000000..ed18f94f1
--- /dev/null
+++ b/qa/standalone/scrub/osd-mapper.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+# -*- mode:text; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+# vim: ts=8 sw=2 smarttab
+#
+# test the handling of a corrupted SnapMapper DB by Scrub
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+# one clone & multiple snaps (according to the number of parameters)
+function make_a_clone()
+{
+ #turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+ local pool=$1
+ local obj=$2
+ echo $RANDOM | rados -p $pool put $obj - || return 1
+ shift 2
+ for snap in $@ ; do
+ rados -p $pool mksnap $snap || return 1
+ done
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+function TEST_truncated_sna_record() {
+ local dir=$1
+ local -A cluster_conf=(
+ ['osds_num']="3"
+ ['pgs_in_pool']="4"
+ ['pool_name']="test"
+ )
+
+ local extr_dbg=3
+ (( extr_dbg > 1 )) && echo "Dir: $dir"
+ standard_scrub_cluster $dir cluster_conf
+ ceph tell osd.* config set osd_stats_update_period_not_scrubbing "1"
+ ceph tell osd.* config set osd_stats_update_period_scrubbing "1"
+
+ local osdn=${cluster_conf['osds_num']}
+ local poolid=${cluster_conf['pool_id']}
+ local poolname=${cluster_conf['pool_name']}
+ local objname="objxxx"
+
+ # create an object and clone it
+ make_a_clone $poolname $objname snap01 snap02 || return 1
+ make_a_clone $poolname $objname snap13 || return 1
+ make_a_clone $poolname $objname snap24 snap25 || return 1
+ echo $RANDOM | rados -p $poolname put $objname - || return 1
+
+ #identify the PG and the primary OSD
+ local pgid=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.pgid'`
+ local osd=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
+ echo "pgid is $pgid (primary: osd.$osd)"
+ # turn on the publishing of test data in the 'scrubber' section of 'pg query' output
+ set_query_debug $pgid
+
+ # verify the existence of these clones
+ (( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname
+
+ # scrub the PG
+ ceph pg $pgid deep_scrub || return 1
+
+ # we aren't just waiting for the scrub to terminate, but also for the
+ # logs to be published
+ sleep 3
+ ceph pg dump pgs
+ until grep -a -q -- "event: --^^^^---- ScrubFinished" $dir/osd.$osd.log ; do
+ sleep 0.2
+ done
+
+ ceph pg dump pgs
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+ sleep 5
+ grep -a -q -v "ERR" $dir/osd.$osd.log || return 1
+
+ # kill the OSDs
+ kill_daemons $dir TERM osd || return 1
+
+ (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/0 dump "p"
+ (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump "p" | grep -a SNA_
+ (( extr_dbg >= 2 )) && grep -a SNA_ /tmp/oo2.dump
+ (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump p 2> /dev/null
+ local num_sna_b4=`ceph-kvstore-tool bluestore-kv $dir/$osd dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \
+ | awk -e '{print $2;}' | wc -l`
+
+ for sdn in $(seq 0 $(expr $osdn - 1))
+ do
+ kvdir=$dir/$sdn
+ echo "corrupting the SnapMapper DB of osd.$sdn (db: $kvdir)"
+ (( extr_dbg >= 3 )) && ceph-kvstore-tool bluestore-kv $kvdir dump "p"
+
+ # truncate the 'mapping' (SNA_) entry corresponding to the snap13 clone
+ KY=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_0000000000000003_000000000000000' \
+ | awk -e '{print $2;}'`
+ (( extr_dbg >= 1 )) && echo "SNA key: $KY" | cat -v
+
+ tmp_fn1=`mktemp -p /tmp --suffix="_the_val"`
+ (( extr_dbg >= 1 )) && echo "Value dumped in: $tmp_fn1"
+ ceph-kvstore-tool bluestore-kv $kvdir get p "$KY" out $tmp_fn1 2> /dev/null
+ (( extr_dbg >= 2 )) && od -xc $tmp_fn1
+
+ NKY=${KY:0:-30}
+ ceph-kvstore-tool bluestore-kv $kvdir rm "p" "$KY" 2> /dev/null
+ ceph-kvstore-tool bluestore-kv $kvdir set "p" "$NKY" in $tmp_fn1 2> /dev/null
+
+ (( extr_dbg >= 1 )) || rm $tmp_fn1
+ done
+
+ orig_osd_args=" ${cluster_conf['osd_args']}"
+ orig_osd_args=" $(echo $orig_osd_args)"
+ (( extr_dbg >= 2 )) && echo "Copied OSD args: /$orig_osd_args/ /${orig_osd_args:1}/"
+ for sdn in $(seq 0 $(expr $osdn - 1))
+ do
+ CEPH_ARGS="$CEPH_ARGS $orig_osd_args" activate_osd $dir $sdn
+ done
+ sleep 1
+
+ for sdn in $(seq 0 $(expr $osdn - 1))
+ do
+ timeout 60 ceph tell osd.$sdn version
+ done
+ rados --format json-pretty -p $poolname listsnaps $objname
+
+ # when scrubbing now - we expect the scrub to emit a cluster log ERR message regarding SnapMapper internal inconsistency
+ ceph osd unset nodeep-scrub || return 1
+ ceph osd unset noscrub || return 1
+
+ # what is the primary now?
+ local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
+ ceph pg dump pgs
+ sleep 2
+ ceph pg $pgid deep_scrub || return 1
+ sleep 5
+ ceph pg dump pgs
+ (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
+ grep -a -q "ERR" $dir/osd.$cur_prim.log || return 1
+
+ # but did we fix the snap issue? let's try scrubbing again
+
+ local prev_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l`
+ echo "prev count: $prev_err_cnt"
+
+ # scrub again. No errors expected this time
+ ceph pg $pgid deep_scrub || return 1
+ sleep 5
+ ceph pg dump pgs
+ (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
+ local current_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l`
+ (( extr_dbg >= 1 )) && echo "current count: $current_err_cnt"
+ (( current_err_cnt == prev_err_cnt )) || return 1
+ kill_daemons $dir TERM osd || return 1
+ kvdir=$dir/$cur_prim
+ (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_' \
+ | awk -e '{print $2;}'
+ local num_sna_full=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \
+ | awk -e '{print $2;}' | wc -l`
+ (( num_sna_full == num_sna_b4 )) || return 1
+ return 0
+}
+
+
+main osd-mapper "$@"
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh
new file mode 100755
index 000000000..9541852c7
--- /dev/null
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -0,0 +1,352 @@
+#! /usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+# Simple test for "not scheduling scrubs due to active recovery"
+# OSD::sched_scrub() called on all OSDs during ticks
+function TEST_recovery_scrub_1() {
+ local dir=$1
+ local poolname=test
+
+ TESTDATA="testdata.$$"
+ OSDS=4
+ PGS=1
+ OBJECTS=100
+ ERRORS=0
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \
+ --osd_scrub_interval_randomize_ratio=0.0 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_scrub_during_recovery=false || return 1
+ done
+
+ # Create a pool with $PGS pgs
+ create_pool $poolname $PGS $PGS
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=$TESTDATA bs=1M count=50
+ for i in $(seq 1 $OBJECTS)
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ ceph osd pool set $poolname size 4
+
+ # Wait for recovery to start
+ set -o pipefail
+ count=0
+ while(true)
+ do
+ if ceph --format json pg dump pgs |
+ jq '.pg_stats | [.[] | .state | contains("recovering")]' | grep -q true
+ then
+ break
+ fi
+ sleep 2
+ if test "$count" -eq "10"
+ then
+ echo "Recovery never started"
+ return 1
+ fi
+ count=$(expr $count + 1)
+ done
+ set +o pipefail
+ ceph pg dump pgs
+
+ sleep 10
+ # Work around for http://tracker.ceph.com/issues/38195
+ kill_daemons $dir #|| return 1
+
+ declare -a err_strings
+ err_strings[0]="not scheduling scrubs due to active recovery"
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ grep "not scheduling scrubs" $dir/osd.${osd}.log
+ done
+ for err_string in "${err_strings[@]}"
+ do
+ found=false
+ count=0
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ if grep -q "$err_string" $dir/osd.${osd}.log
+ then
+ found=true
+ count=$(expr $count + 1)
+ fi
+ done
+ if [ "$found" = "false" ]; then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ [ $count -eq $OSDS ] || return 1
+ done
+
+ teardown $dir || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ echo "TEST PASSED"
+ return 0
+}
+
+##
+# a modified version of wait_for_scrub(), which terminates if the Primary
+# of the to-be-scrubbed PG changes
+#
+# Given the *last_scrub*, wait for scrub to happen on **pgid**. It
+# will fail if scrub does not complete within $TIMEOUT seconds. The
+# repair is complete whenever the **get_last_scrub_stamp** function
+# reports a timestamp different from the one given in argument.
+#
+# @param pgid the id of the PG
+# @param the primary OSD when started
+# @param last_scrub timestamp of the last scrub for *pgid*
+# @return 0 on success, 1 on error
+#
+function wait_for_scrub_mod() {
+ local pgid=$1
+ local orig_primary=$2
+ local last_scrub="$3"
+ local sname=${4:-last_scrub_stamp}
+
+ for ((i=0; i < $TIMEOUT; i++)); do
+ sleep 0.2
+ if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then
+ return 0
+ fi
+ sleep 1
+ # are we still the primary?
+ local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+ if [ $orig_primary != $current_primary ]; then
+ echo $orig_primary no longer primary for $pgid
+ return 0
+ fi
+ done
+ return 1
+}
+
+##
+# A modified version of pg_scrub()
+#
+# Run scrub on **pgid** and wait until it completes. The pg_scrub
+# function will fail if repair does not complete within $TIMEOUT
+# seconds. The pg_scrub is complete whenever the
+# **get_last_scrub_stamp** function reports a timestamp different from
+# the one stored before starting the scrub, or whenever the Primary
+# changes.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function pg_scrub_mod() {
+ local pgid=$1
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ # locate the primary
+ local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+ local recovery=false
+ ceph pg scrub $pgid
+ #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state"
+ if ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" | grep -q recovering
+ then
+ recovery=true
+ fi
+ wait_for_scrub_mod $pgid $my_primary "$last_scrub" || return 1
+ if test $recovery = "true"
+ then
+ return 2
+ fi
+}
+
+# Same as wait_background() except that it checks for exit code 2 and bumps recov_scrub_count
+function wait_background_check() {
+ # We extract the PIDS from the variable name
+ pids=${!1}
+
+ return_code=0
+ for pid in $pids; do
+ wait $pid
+ retcode=$?
+ if test $retcode -eq 2
+ then
+ recov_scrub_count=$(expr $recov_scrub_count + 1)
+ elif test $retcode -ne 0
+ then
+ # If one process failed then return 1
+ return_code=1
+ fi
+ done
+
+ # We empty the variable reporting that all process ended
+ eval "$1=''"
+
+ return $return_code
+}
+
+# osd_scrub_during_recovery=true make sure scrub happens
+function TEST_recovery_scrub_2() {
+ local dir=$1
+ local poolname=test
+
+ TESTDATA="testdata.$$"
+ OSDS=8
+ PGS=32
+ OBJECTS=40
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \
+ --osd_scrub_interval_randomize_ratio=0.0 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=10 || return 1
+ done
+
+ # Create a pool with $PGS pgs
+ create_pool $poolname $PGS $PGS
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1M count=50
+ for i in $(seq 1 $OBJECTS)
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ ceph osd pool set $poolname size 3
+
+ ceph pg dump pgs
+
+ # Wait for recovery to start
+ count=0
+ while(true)
+ do
+ #ceph --format json pg dump pgs | jq '.pg_stats | [.[].state]'
+ if test $(ceph --format json pg dump pgs |
+ jq '.pg_stats | [.[].state]'| grep recovering | wc -l) -ge 2
+ then
+ break
+ fi
+ sleep 2
+ if test "$count" -eq "10"
+ then
+ echo "Not enough recovery started simultaneously"
+ return 1
+ fi
+ count=$(expr $count + 1)
+ done
+ ceph pg dump pgs
+
+ pids=""
+ recov_scrub_count=0
+ for pg in $(seq 0 $(expr $PGS - 1))
+ do
+ run_in_background pids pg_scrub_mod $poolid.$(printf "%x" $pg)
+ done
+ wait_background_check pids
+ return_code=$?
+ if [ $return_code -ne 0 ]; then return $return_code; fi
+
+ ERRORS=0
+ if test $recov_scrub_count -eq 0
+ then
+ echo "No scrubs occurred while PG recovering"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid')
+ pid=$(cat $pidfile)
+ if ! kill -0 $pid
+ then
+ echo "OSD crash occurred"
+ #tail -100 $dir/osd.0.log
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ # Work around for http://tracker.ceph.com/issues/38195
+ kill_daemons $dir #|| return 1
+
+ declare -a err_strings
+ err_strings[0]="not scheduling scrubs due to active recovery"
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ grep "not scheduling scrubs" $dir/osd.${osd}.log
+ done
+ for err_string in "${err_strings[@]}"
+ do
+ found=false
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ if grep "$err_string" $dir/osd.${osd}.log > /dev/null;
+ then
+ found=true
+ fi
+ done
+ if [ "$found" = "true" ]; then
+ echo "Found log message not expected '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ teardown $dir || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ echo "TEST PASSED"
+ return 0
+}
+
+main osd-recovery-scrub "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh osd-recovery-scrub.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-dump.sh b/qa/standalone/scrub/osd-scrub-dump.sh
new file mode 100755
index 000000000..f21ec7801
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-dump.sh
@@ -0,0 +1,180 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+MAX_SCRUBS=4
+SCRUB_SLEEP=3
+POOL_SIZE=3
+
+function run() {
+ local dir=$1
+ shift
+ local CHUNK_MAX=5
+
+ export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS "
+ CEPH_ARGS+="--osd_shallow_scrub_chunk_max=$CHUNK_MAX "
+ CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP "
+ CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE "
+ # Set scheduler to "wpq" until there's a reliable way to query scrub states
+ # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+ # scrub sleep to 0 and as a result the checks in the test fail.
+ CEPH_ARGS+="--osd_op_queue=wpq "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_recover_unexpected() {
+ local dir=$1
+ shift
+ local OSDS=6
+ local PGS=16
+ local POOLS=3
+ local OBJS=1000
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for o in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $o
+ done
+
+ for i in $(seq 1 $POOLS)
+ do
+ create_pool test$i $PGS $PGS
+ done
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=datafile bs=4k count=2
+ for i in $(seq 1 $POOLS)
+ do
+ for j in $(seq 1 $OBJS)
+ do
+ rados -p test$i put obj$j datafile
+ done
+ done
+ rm datafile
+
+ ceph osd set noscrub
+ ceph osd set nodeep-scrub
+
+ for qpg in $(ceph pg dump pgs --format=json-pretty | jq '.pg_stats[].pgid')
+ do
+ primary=$(ceph pg dump pgs --format=json | jq ".pg_stats[] | select(.pgid == $qpg) | .acting_primary")
+ eval pg=$qpg # strip quotes around qpg
+ ceph tell $pg scrub
+ done
+
+ ceph pg dump pgs
+
+ max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs')
+ if [ $max != $MAX_SCRUBS ]; then
+ echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations"
+ return 1
+ fi
+
+ ceph osd unset noscrub
+
+ ok=false
+ for i in $(seq 0 300)
+ do
+ ceph pg dump pgs
+ if ceph pg dump pgs | grep '+scrubbing'; then
+ ok=true
+ break
+ fi
+ sleep 1
+ done
+ if test $ok = "false"; then
+ echo "ERROR: Test set-up failed no scrubbing"
+ return 1
+ fi
+
+ local total=0
+ local zerocount=0
+ local maxzerocount=3
+ while(true)
+ do
+ pass=0
+ for o in $(seq 0 $(expr $OSDS - 1))
+ do
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations
+ scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote')
+ if [ $scrubs -gt $MAX_SCRUBS ]; then
+ echo "ERROR: More than $MAX_SCRUBS currently reserved"
+ return 1
+ fi
+ pass=$(expr $pass + $scrubs)
+ done
+ if [ $pass = "0" ]; then
+ zerocount=$(expr $zerocount + 1)
+ fi
+ if [ $zerocount -gt $maxzerocount ]; then
+ break
+ fi
+ total=$(expr $total + $pass)
+ if [ $total -gt 0 ]; then
+ # already saw some reservations, so wait longer to avoid excessive over-counting.
+ # Note the loop itself takes about 2-3 seconds
+ sleep $(expr $SCRUB_SLEEP - 2)
+ else
+ sleep 0.5
+ fi
+ done
+
+ # Check that there are no more scrubs
+ for i in $(seq 0 5)
+ do
+ if ceph pg dump pgs | grep '+scrubbing'; then
+ echo "ERROR: Extra scrubs after test completion...not expected"
+ return 1
+ fi
+ sleep $SCRUB_SLEEP
+ done
+
+ echo $total total reservations seen
+
+ # Sort of arbitraty number based on PGS * POOLS * POOL_SIZE as the number of total scrub
+ # reservations that must occur. However, the loop above might see the same reservation more
+ # than once.
+ actual_reservations=$(expr $PGS \* $POOLS \* $POOL_SIZE)
+ if [ $total -lt $actual_reservations ]; then
+ echo "ERROR: Unexpectedly low amount of scrub reservations seen during test"
+ return 1
+ fi
+
+ return 0
+}
+
+
+main osd-scrub-dump "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make check && \
+# ../qa/run-standalone.sh osd-scrub-dump.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
new file mode 100755
index 000000000..13b30360c
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -0,0 +1,6255 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+set -x
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+if [ `uname` = FreeBSD ]; then
+ # erasure coding overwrites are only tested on Bluestore
+ # erasure coding on filestore is unsafe
+ # http://docs.ceph.com/en/latest/rados/operations/erasure-code/#erasure-coding-with-overwrites
+ use_ec_overwrite=false
+else
+ use_ec_overwrite=true
+fi
+
+# Test development and debugging
+# Set to "yes" in order to ignore diff errors and save results to update test
+getjson="no"
+
+# Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info.
+jqfilter='def walk(f):
+ . as $in
+ | if type == "object" then
+ reduce keys[] as $key
+ ( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
+ elif type == "array" then map( walk(f) ) | f
+ else f
+ end;
+walk(if type == "object" then del(.mtime) else . end)
+| walk(if type == "object" then del(.local_mtime) else . end)
+| walk(if type == "object" then del(.last_reqid) else . end)
+| walk(if type == "object" then del(.version) else . end)
+| walk(if type == "object" then del(.prior_version) else . end)'
+
+sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))'
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd-skip-data-digest=false "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function add_something() {
+ local dir=$1
+ local poolname=$2
+ local obj=${3:-SOMETHING}
+ local scrub=${4:-noscrub}
+
+ if [ "$scrub" = "noscrub" ];
+ then
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+ else
+ ceph osd unset noscrub || return 1
+ ceph osd unset nodeep-scrub || return 1
+ fi
+
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ rados --pool $poolname put $obj $dir/ORIGINAL || return 1
+}
+
+#
+# Corrupt one copy of a replicated pool
+#
+function TEST_corrupt_and_repair_replicated() {
+ local dir=$1
+ local poolname=rbd
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ add_something $dir $poolname || return 1
+ corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
+ # Reproduces http://tracker.ceph.com/issues/8914
+ corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1
+}
+
+#
+# Allow repair to be scheduled when some recovering is still undergoing on the same OSD
+#
+function TEST_allow_repair_during_recovery() {
+ local dir=$1
+ local poolname=rbd
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_scrub_during_recovery=false \
+ --osd_repair_during_recovery=true \
+ --osd_debug_pretend_recovery_active=true || return 1
+ run_osd $dir 1 --osd_scrub_during_recovery=false \
+ --osd_repair_during_recovery=true \
+ --osd_debug_pretend_recovery_active=true || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ add_something $dir $poolname || return 1
+ corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
+}
+
+#
+# Skip non-repair scrub correctly during recovery
+#
+function TEST_skip_non_repair_during_recovery() {
+ local dir=$1
+ local poolname=rbd
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 --osd_scrub_during_recovery=false \
+ --osd_repair_during_recovery=true \
+ --osd_debug_pretend_recovery_active=true || return 1
+ run_osd $dir 1 --osd_scrub_during_recovery=false \
+ --osd_repair_during_recovery=true \
+ --osd_debug_pretend_recovery_active=true || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ add_something $dir $poolname || return 1
+ scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
+}
+
+function scrub_and_not_schedule() {
+ local dir=$1
+ local poolname=$2
+ local osd=$3
+
+ #
+ # 1) start a non-repair scrub
+ #
+ local pg=$(get_pg $poolname SOMETHING)
+ local last_scrub=$(get_last_scrub_stamp $pg)
+ ceph pg scrub $pg
+
+ #
+ # 2) Assure the scrub is not scheduled
+ #
+ for ((i=0; i < 3; i++)); do
+ if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then
+ return 1
+ fi
+ sleep 1
+ done
+
+ #
+ # 3) Access to the file must OK
+ #
+ objectstore_tool $dir $osd SOMETHING list-attrs || return 1
+ rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+function corrupt_and_repair_two() {
+ local dir=$1
+ local poolname=$2
+ local first=$3
+ local second=$4
+
+ #
+ # 1) remove the corresponding file from the OSDs
+ #
+ pids=""
+ run_in_background pids objectstore_tool $dir $first SOMETHING remove
+ run_in_background pids objectstore_tool $dir $second SOMETHING remove
+ wait_background pids
+ return_code=$?
+ if [ $return_code -ne 0 ]; then return $return_code; fi
+
+ #
+ # 2) repair the PG
+ #
+ local pg=$(get_pg $poolname SOMETHING)
+ repair $pg
+ #
+ # 3) The files must be back
+ #
+ pids=""
+ run_in_background pids objectstore_tool $dir $first SOMETHING list-attrs
+ run_in_background pids objectstore_tool $dir $second SOMETHING list-attrs
+ wait_background pids
+ return_code=$?
+ if [ $return_code -ne 0 ]; then return $return_code; fi
+
+ rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+#
+# 1) add an object
+# 2) remove the corresponding file from a designated OSD
+# 3) repair the PG
+# 4) check that the file has been restored in the designated OSD
+#
+function corrupt_and_repair_one() {
+ local dir=$1
+ local poolname=$2
+ local osd=$3
+
+ #
+ # 1) remove the corresponding file from the OSD
+ #
+ objectstore_tool $dir $osd SOMETHING remove || return 1
+ #
+ # 2) repair the PG
+ #
+ local pg=$(get_pg $poolname SOMETHING)
+ repair $pg
+ #
+ # 3) The file must be back
+ #
+ objectstore_tool $dir $osd SOMETHING list-attrs || return 1
+ rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+function corrupt_and_repair_erasure_coded() {
+ local dir=$1
+ local poolname=$2
+
+ add_something $dir $poolname || return 1
+
+ local primary=$(get_primary $poolname SOMETHING)
+ local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//"))
+ local not_primary_first=${osds[0]}
+ local not_primary_second=${osds[1]}
+
+ # Reproduces http://tracker.ceph.com/issues/10017
+ corrupt_and_repair_one $dir $poolname $primary || return 1
+ # Reproduces http://tracker.ceph.com/issues/10409
+ corrupt_and_repair_one $dir $poolname $not_primary_first || return 1
+ corrupt_and_repair_two $dir $poolname $not_primary_first $not_primary_second || return 1
+ corrupt_and_repair_two $dir $poolname $primary $not_primary_first || return 1
+
+}
+
+function auto_repair_erasure_coded() {
+ local dir=$1
+ local allow_overwrites=$2
+ local poolname=ecpool
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-auto-repair=true \
+ --osd-deep-scrub-interval=5 \
+ --osd-scrub-max-interval=5 \
+ --osd-scrub-min-interval=5 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ # Create an EC pool
+ create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+ # Wait for auto repair
+ local pgid=$(get_pg $poolname SOMETHING)
+ wait_for_scrub $pgid "$(get_last_scrub_stamp $pgid)"
+ wait_for_clean || return 1
+ # Verify - the file should be back
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+ rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+function TEST_auto_repair_erasure_coded_appends() {
+ auto_repair_erasure_coded $1 false
+}
+
+function TEST_auto_repair_erasure_coded_overwrites() {
+ if [ "$use_ec_overwrite" = "true" ]; then
+ auto_repair_erasure_coded $1 true
+ fi
+}
+
+# initiate a scrub, then check for the (expected) 'scrubbing' and the
+# (not expected until an error was identified) 'repair'
+# Arguments: osd#, pg, sleep time
+function initiate_and_fetch_state() {
+ local the_osd="osd.$1"
+ local pgid=$2
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+
+ set_config "osd" "$1" "osd_scrub_sleep" "$3"
+ set_config "osd" "$1" "osd_scrub_auto_repair" "true"
+
+ flush_pg_stats
+ date --rfc-3339=ns
+
+ # note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
+ env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
+ env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
+
+ # wait for 'scrubbing' to appear
+ for ((i=0; i < 80; i++)); do
+
+ st=`ceph pg $pgid query --format json | jq '.state' `
+ echo $i ") state now: " $st
+
+ case "$st" in
+ *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
+ *scrubbing* ) echo "found scrub"; return 0;;
+ *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;;
+ *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;;
+ * ) echo $st;;
+ esac
+
+ if [ $((i % 10)) == 4 ]; then
+ echo "loop --------> " $i
+ fi
+ sleep 0.3
+ done
+
+ echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
+ return 1
+}
+
+function wait_end_of_scrub() { # osd# pg
+ local the_osd="osd.$1"
+ local pgid=$2
+
+ for ((i=0; i < 40; i++)); do
+ st=`ceph pg $pgid query --format json | jq '.state' `
+ echo "wait-scrub-end state now: " $st
+ [[ $st =~ (.*scrubbing.*) ]] || break
+ if [ $((i % 5)) == 4 ] ; then
+ flush_pg_stats
+ fi
+ sleep 0.3
+ done
+
+ if [[ $st =~ (.*scrubbing.*) ]]
+ then
+ # a timeout
+ return 1
+ fi
+ return 0
+}
+
+
+function TEST_auto_repair_bluestore_tag() {
+ local dir=$1
+ local poolname=testpool
+
+ # Launch a cluster with 3 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ # Set scheduler to "wpq" until there's a reliable way to query scrub states
+ # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+ # scrub sleep to 0 and as a result the checks in the test fail.
+ local ceph_osd_args="--osd-scrub-auto-repair=true \
+ --osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0 \
+ --osd-op-queue=wpq"
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ echo "Affected PG " $pgid " w/ primary " $primary
+ local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+ initiate_and_fetch_state $primary $pgid "3.0"
+ r=$?
+ echo "initiate_and_fetch_state ret: " $r
+ set_config "osd" "$1" "osd_scrub_sleep" "0"
+ if [ $r -ne 0 ]; then
+ return 1
+ fi
+
+ wait_end_of_scrub "$primary" "$pgid" || return 1
+ ceph pg dump pgs
+
+ # Verify - the file should be back
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ grep scrub_finish $dir/osd.${primary}.log
+}
+
+
+function TEST_auto_repair_bluestore_basic() {
+ local dir=$1
+ local poolname=testpool
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-auto-repair=true \
+ --osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+ ceph tell $pgid deep_scrub
+ ceph tell $pgid scrub
+
+ # Wait for auto repair
+ wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+ wait_for_clean || return 1
+ ceph pg dump pgs
+ # Verify - the file should be back
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ grep scrub_finish $dir/osd.${primary}.log
+}
+
+function TEST_auto_repair_bluestore_scrub() {
+ local dir=$1
+ local poolname=testpool
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-auto-repair=true \
+ --osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0 \
+ --osd-scrub-backoff-ratio=0"
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+ ceph tell $pgid scrub
+
+ # Wait for scrub -> auto repair
+ wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+ ceph pg dump pgs
+ # Actually this causes 2 scrubs, so we better wait a little longer
+ sleep 5
+ wait_for_clean || return 1
+ ceph pg dump pgs
+ # Verify - the file should be back
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+ rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ grep scrub_finish $dir/osd.${primary}.log
+
+ # This should have caused 1 object to be repaired
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "1" || return 1
+}
+
+function TEST_auto_repair_bluestore_failed() {
+ local dir=$1
+ local poolname=testpool
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-auto-repair=true \
+ --osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ for i in $(seq 1 10)
+ do
+ rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+ done
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1
+ # obj2 can't be repaired
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1
+ objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1
+
+ local pgid=$(get_pg $poolname obj1)
+ local primary=$(get_primary $poolname obj1)
+ local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+ ceph tell $pgid deep_scrub
+ ceph tell $pgid scrub
+
+ # Wait for auto repair
+ wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+ wait_for_clean || return 1
+ flush_pg_stats
+ grep scrub_finish $dir/osd.${primary}.log
+ grep -q "scrub_finish.*still present after re-scrub" $dir/osd.${primary}.log || return 1
+ ceph pg dump pgs
+ ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1
+
+ # Verify - obj1 should be back
+ # Restarted osd get $ceph_osd_args passed
+ objectstore_tool $dir $(get_not_primary $poolname obj1) obj1 list-attrs || return 1
+ rados --pool $poolname get obj1 $dir/COPY || return 1
+ diff $dir/ORIGINAL $dir/COPY || return 1
+ grep scrub_finish $dir/osd.${primary}.log
+
+ # Make it repairable
+ objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 remove || return 1
+ repair $pgid
+ sleep 2
+
+ flush_pg_stats
+ ceph pg dump pgs
+ ceph pg dump pgs | grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " || return 1
+ grep scrub_finish $dir/osd.${primary}.log
+}
+
+function TEST_auto_repair_bluestore_failed_norecov() {
+ local dir=$1
+ local poolname=testpool
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-auto-repair=true \
+ --osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ for i in $(seq 1 10)
+ do
+ rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+ done
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ # obj1 can't be repaired
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1
+ objectstore_tool $dir $(get_primary $poolname SOMETHING) obj1 rm-attr _ || return 1
+ # obj2 can't be repaired
+ objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1
+ objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1
+
+ local pgid=$(get_pg $poolname obj1)
+ local primary=$(get_primary $poolname obj1)
+ local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+ ceph tell $pgid deep_scrub
+ ceph tell $pgid scrub
+
+ # Wait for auto repair
+ wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+ wait_for_clean || return 1
+ flush_pg_stats
+ grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1
+ ceph pg dump pgs
+ ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1
+}
+
+function TEST_repair_stats() {
+ local dir=$1
+ local poolname=testpool
+ local OSDS=2
+ local OBJS=30
+ # This need to be an even number
+ local REPAIRS=20
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 $(expr $OSDS - 1)) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ create_pool $poolname 1 1 || return 1
+ ceph osd pool set $poolname size 2
+ wait_for_clean || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ for i in $(seq 1 $OBJS)
+ do
+ rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+ done
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ local other=$(get_not_primary $poolname obj1)
+ local pgid=$(get_pg $poolname obj1)
+ local primary=$(get_primary $poolname obj1)
+
+ kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+ kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
+ for i in $(seq 1 $REPAIRS)
+ do
+ # Remove from both osd.0 and osd.1
+ OSD=$(expr $i % 2)
+ _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
+ done
+ activate_osd $dir $primary $ceph_osd_args || return 1
+ activate_osd $dir $other $ceph_osd_args || return 1
+ wait_for_clean || return 1
+
+ repair $pgid
+ wait_for_clean || return 1
+ ceph pg dump pgs
+ flush_pg_stats
+
+ # This should have caused 1 object to be repaired
+ ceph pg $pgid query | jq '.info.stats.stat_sum'
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$REPAIRS" || return 1
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired")
+ test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired")
+ test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$REPAIRS" || return 1
+}
+
+function TEST_repair_stats_ec() {
+ local dir=$1
+ local poolname=testpool
+ local OSDS=3
+ local OBJS=30
+ # This need to be an even number
+ local REPAIRS=26
+ local allow_overwrites=false
+
+ # Launch a cluster with 5 seconds scrub interval
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+ --osd-scrub-interval-randomize-ratio=0"
+ for id in $(seq 0 $(expr $OSDS - 1)) ; do
+ run_osd $dir $id $ceph_osd_args || return 1
+ done
+
+ # Create an EC pool
+ create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+ # Put an object
+ local payload=ABCDEF
+ echo $payload > $dir/ORIGINAL
+ for i in $(seq 1 $OBJS)
+ do
+ rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+ done
+
+ # Remove the object from one shard physically
+ # Restarted osd get $ceph_osd_args passed
+ local other=$(get_not_primary $poolname obj1)
+ local pgid=$(get_pg $poolname obj1)
+ local primary=$(get_primary $poolname obj1)
+
+ kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+ kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
+ for i in $(seq 1 $REPAIRS)
+ do
+ # Remove from both osd.0 and osd.1
+ OSD=$(expr $i % 2)
+ _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
+ done
+ activate_osd $dir $primary $ceph_osd_args || return 1
+ activate_osd $dir $other $ceph_osd_args || return 1
+ wait_for_clean || return 1
+
+ repair $pgid
+ wait_for_clean || return 1
+ ceph pg dump pgs
+ flush_pg_stats
+
+ # This should have caused 1 object to be repaired
+ ceph pg $pgid query | jq '.info.stats.stat_sum'
+ COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+ test "$COUNT" = "$REPAIRS" || return 1
+
+ for osd in $(seq 0 $(expr $OSDS - 1)) ; do
+ if [ $osd = $other -o $osd = $primary ]; then
+ repair=$(expr $REPAIRS / 2)
+ else
+ repair="0"
+ fi
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired")
+ test "$COUNT" = "$repair" || return 1
+ done
+
+ ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+ COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+ test "$COUNT" = "$REPAIRS" || return 1
+}
+
+function corrupt_and_repair_jerasure() {
+ local dir=$1
+ local allow_overwrites=$2
+ local poolname=ecpool
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for id in $(seq 0 3) ; do
+ run_osd $dir $id || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1
+ corrupt_and_repair_erasure_coded $dir $poolname || return 1
+}
+
+function TEST_corrupt_and_repair_jerasure_appends() {
+ corrupt_and_repair_jerasure $1 false
+}
+
+function TEST_corrupt_and_repair_jerasure_overwrites() {
+ if [ "$use_ec_overwrite" = "true" ]; then
+ corrupt_and_repair_jerasure $1 true
+ fi
+}
+
+function corrupt_and_repair_lrc() {
+ local dir=$1
+ local allow_overwrites=$2
+ local poolname=ecpool
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for id in $(seq 0 9) ; do
+ run_osd $dir $id || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_ec_pool $poolname $allow_overwrites k=4 m=2 l=3 plugin=lrc || return 1
+ corrupt_and_repair_erasure_coded $dir $poolname || return 1
+}
+
+function TEST_corrupt_and_repair_lrc_appends() {
+ corrupt_and_repair_lrc $1 false
+}
+
+function TEST_corrupt_and_repair_lrc_overwrites() {
+ if [ "$use_ec_overwrite" = "true" ]; then
+ corrupt_and_repair_lrc $1 true
+ fi
+}
+
+function unfound_erasure_coded() {
+ local dir=$1
+ local allow_overwrites=$2
+ local poolname=ecpool
+ local payload=ABCDEF
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for id in $(seq 0 3) ; do
+ run_osd $dir $id || return 1
+ done
+
+ create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1
+
+ add_something $dir $poolname || return 1
+
+ local primary=$(get_primary $poolname SOMETHING)
+ local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//"))
+ local not_primary_first=${osds[0]}
+ local not_primary_second=${osds[1]}
+ local not_primary_third=${osds[2]}
+
+ #
+ # 1) remove the corresponding file from the OSDs
+ #
+ pids=""
+ run_in_background pids objectstore_tool $dir $not_primary_first SOMETHING remove
+ run_in_background pids objectstore_tool $dir $not_primary_second SOMETHING remove
+ run_in_background pids objectstore_tool $dir $not_primary_third SOMETHING remove
+ wait_background pids
+ return_code=$?
+ if [ $return_code -ne 0 ]; then return $return_code; fi
+
+ #
+ # 2) repair the PG
+ #
+ local pg=$(get_pg $poolname SOMETHING)
+ repair $pg
+ #
+ # 3) check pg state
+ #
+ # it may take a bit to appear due to mon/mgr asynchrony
+ for f in `seq 1 60`; do
+ ceph -s | grep "1/1 objects unfound" && break
+ sleep 1
+ done
+ ceph -s|grep "4 up" || return 1
+ ceph -s|grep "4 in" || return 1
+ ceph -s|grep "1/1 objects unfound" || return 1
+}
+
+function TEST_unfound_erasure_coded_appends() {
+ unfound_erasure_coded $1 false
+}
+
+function TEST_unfound_erasure_coded_overwrites() {
+ if [ "$use_ec_overwrite" = "true" ]; then
+ unfound_erasure_coded $1 true
+ fi
+}
+
+#
+# list_missing for EC pool
+#
+function list_missing_erasure_coded() {
+ local dir=$1
+ local allow_overwrites=$2
+ local poolname=ecpool
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+ # Put an object and remove the two shards (including primary)
+ add_something $dir $poolname MOBJ0 || return 1
+ local -a osds0=($(get_osds $poolname MOBJ0))
+
+ # Put another object and remove two shards (excluding primary)
+ add_something $dir $poolname MOBJ1 || return 1
+ local -a osds1=($(get_osds $poolname MOBJ1))
+
+ # Stop all osd daemons
+ for id in $(seq 0 2) ; do
+ kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1
+ done
+
+ id=${osds0[0]}
+ ceph-objectstore-tool --data-path $dir/$id \
+ MOBJ0 remove || return 1
+ id=${osds0[1]}
+ ceph-objectstore-tool --data-path $dir/$id \
+ MOBJ0 remove || return 1
+
+ id=${osds1[1]}
+ ceph-objectstore-tool --data-path $dir/$id \
+ MOBJ1 remove || return 1
+ id=${osds1[2]}
+ ceph-objectstore-tool --data-path $dir/$id \
+ MOBJ1 remove || return 1
+
+ for id in $(seq 0 2) ; do
+ activate_osd $dir $id >&2 || return 1
+ done
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ # Get get - both objects should in the same PG
+ local pg=$(get_pg $poolname MOBJ0)
+
+ # Repair the PG, which triggers the recovering,
+ # and should mark the object as unfound
+ repair $pg
+
+ for i in $(seq 0 120) ; do
+ [ $i -lt 60 ] || return 1
+ matches=$(ceph pg $pg list_unfound | egrep "MOBJ0|MOBJ1" | wc -l)
+ [ $matches -eq 2 ] && break
+ done
+}
+
+function TEST_list_missing_erasure_coded_appends() {
+ list_missing_erasure_coded $1 false
+}
+
+function TEST_list_missing_erasure_coded_overwrites() {
+ if [ "$use_ec_overwrite" = "true" ]; then
+ list_missing_erasure_coded $1 true
+ fi
+}
+
+#
+# Corrupt one copy of a replicated pool
+#
+function TEST_corrupt_scrub_replicated() {
+ local dir=$1
+ local poolname=csr_pool
+ local total_objs=19
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_pool foo 1 || return 1
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=ROBJ${i}
+ add_something $dir $poolname $objname || return 1
+
+ rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+ rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+ done
+
+ # Increase file 1 MB + 1KB
+ dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025
+ rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1
+ rm -f $dir/new.ROBJ19
+
+ local pg=$(get_pg $poolname ROBJ0)
+ local primary=$(get_primary $poolname ROBJ0)
+
+ # Compute an old omap digest and save oi
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \
+ config set osd_deep_scrub_update_digest_min_age 0
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \
+ config set osd_deep_scrub_update_digest_min_age 0
+ pg_deep_scrub $pg
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=ROBJ${i}
+
+ # Alternate corruption between osd.0 and osd.1
+ local osd=$(expr $i % 2)
+
+ case $i in
+ 1)
+ # Size (deep scrub data_digest too)
+ local payload=UVWXYZZZ
+ echo $payload > $dir/CORRUPT
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 2)
+ # digest (deep scrub only)
+ local payload=UVWXYZ
+ echo $payload > $dir/CORRUPT
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 3)
+ # missing
+ objectstore_tool $dir $osd $objname remove || return 1
+ ;;
+
+ 4)
+ # Modify omap value (deep scrub only)
+ objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1
+ ;;
+
+ 5)
+ # Delete omap key (deep scrub only)
+ objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1
+ ;;
+
+ 6)
+ # Add extra omap key (deep scrub only)
+ echo extra > $dir/extra-val
+ objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1
+ rm $dir/extra-val
+ ;;
+
+ 7)
+ # Modify omap header (deep scrub only)
+ echo -n newheader > $dir/hdr
+ objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1
+ rm $dir/hdr
+ ;;
+
+ 8)
+ rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
+ rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
+
+ # Break xattrs
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
+ objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
+ echo -n val3-$objname > $dir/newval
+ objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
+ rm $dir/bad-val $dir/newval
+ ;;
+
+ 9)
+ objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi
+ echo -n D > $dir/change
+ rados --pool $poolname put $objname $dir/change
+ objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi
+ rm $dir/oi $dir/change
+ ;;
+
+ # ROBJ10 must be handled after digests are re-computed by a deep scrub below
+ # ROBJ11 must be handled with config change before deep scrub
+ # ROBJ12 must be handled with config change before scrubs
+ # ROBJ13 must be handled before scrubs
+
+ 14)
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1
+ objectstore_tool $dir 1 $objname rm-attr _ || return 1
+ rm $dir/bad-val
+ ;;
+
+ 15)
+ objectstore_tool $dir $osd $objname rm-attr _ || return 1
+ ;;
+
+ 16)
+ objectstore_tool $dir 0 $objname rm-attr snapset || return 1
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
+ ;;
+
+ 17)
+ # Deep-scrub only (all replicas are diffent than the object info
+ local payload=ROBJ17
+ echo $payload > $dir/new.ROBJ17
+ objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1
+ objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1
+ ;;
+
+ 18)
+ # Deep-scrub only (all replicas are diffent than the object info
+ local payload=ROBJ18
+ echo $payload > $dir/new.ROBJ18
+ objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1
+ objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1
+ # Make one replica have a different object info, so a full repair must happen too
+ objectstore_tool $dir $osd $objname corrupt-info || return 1
+ ;;
+
+ 19)
+ # Set osd-max-object-size smaller than this object's size
+
+ esac
+ done
+
+ local pg=$(get_pg $poolname ROBJ0)
+
+ ceph tell osd.\* injectargs -- --osd-max-object-size=1048576
+
+ inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+ inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+
+ pg_scrub $pg
+
+ ERRORS=0
+ declare -a s_err_strings
+ err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
+ err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : object info inconsistent "
+ err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
+ err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
+ err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
+ err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
+ err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
+ err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
+ err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
+ err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
+ err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
+ err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
+ err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
+ err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
+ err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : candidate size 1 info size 7 mismatch"
+ err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
+ err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
+ err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input"
+ err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049713/1049720 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
+ err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 8 inconsistent objects"
+ err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 18 errors"
+ err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:123a5f55:::ROBJ19:head : size 1049600 > 1048576 is too large"
+
+ for err_string in "${err_strings[@]}"
+ do
+ if ! grep -q "$err_string" $dir/osd.${primary}.log
+ then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+ rados list-inconsistent-obj $pg > $dir/json || return 1
+ # Get epoch for repair-get requests
+ epoch=$(jq .epoch $dir/json)
+
+ jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "inconsistents": [
+ {
+ "shards": [
+ {
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 1454963827,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'58",
+ "prior_version": "21'3",
+ "last_reqid": "osd.1.0:57",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf5fba2c6",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 9,
+ "errors": [
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 1454963827,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'58",
+ "prior_version": "21'3",
+ "last_reqid": "osd.1.0:57",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:19.804040",
+ "local_mtime": "2018-04-05 14:33:19.804839",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf5fba2c6",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 3,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ1"
+ }
+ },
+ {
+ "shards": [
+ {
+ "errors": [
+ "stat_error"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ12",
+ "key": "",
+ "snapid": -2,
+ "hash": 3920199997,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'56",
+ "prior_version": "43'36",
+ "last_reqid": "osd.1.0:55",
+ "user_version": 36,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x067f306a",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "stat_error"
+ ],
+ "errors": [],
+ "object": {
+ "version": 36,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ12"
+ }
+ },
+ {
+ "shards": [
+ {
+ "errors": [
+ "stat_error"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ13",
+ "key": "",
+ "snapid": -2,
+ "hash": 2682806379,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'59",
+ "prior_version": "45'39",
+ "last_reqid": "osd.1.0:58",
+ "user_version": 39,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x6441854d",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "stat_error"
+ ],
+ "errors": [],
+ "object": {
+ "version": 39,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ13"
+ }
+ },
+ {
+ "shards": [
+ {
+ "object_info": "bad-val",
+ "size": 7,
+ "errors": [
+ "info_corrupted"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "size": 7,
+ "errors": [
+ "info_missing"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "union_shard_errors": [
+ "info_missing",
+ "info_corrupted"
+ ],
+ "errors": [],
+ "object": {
+ "version": 0,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ14"
+ }
+ },
+ {
+ "shards": [
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ15",
+ "key": "",
+ "snapid": -2,
+ "hash": 504996876,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'49",
+ "prior_version": "49'45",
+ "last_reqid": "osd.1.0:48",
+ "user_version": 45,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:29.498969",
+ "local_mtime": "2018-04-05 14:33:29.499890",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2d2a4d6e",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "size": 7,
+ "errors": [
+ "info_missing"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ15",
+ "key": "",
+ "snapid": -2,
+ "hash": 504996876,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'49",
+ "prior_version": "49'45",
+ "last_reqid": "osd.1.0:48",
+ "user_version": 45,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2d2a4d6e",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "info_missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 45,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ15"
+ }
+ },
+ {
+ "errors": [],
+ "object": {
+ "locator": "",
+ "name": "ROBJ16",
+ "nspace": "",
+ "snap": "head",
+ "version": 0
+ },
+ "shards": [
+ {
+ "errors": [
+ "snapset_missing"
+ ],
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "errors": [
+ "snapset_corrupted"
+ ],
+ "osd": 1,
+ "primary": true,
+ "snapset": "bad-val",
+ "size": 7
+ }
+ ],
+ "union_shard_errors": [
+ "snapset_missing",
+ "snapset_corrupted"
+ ]
+ },
+ {
+ "errors": [
+ "object_info_inconsistency"
+ ],
+ "object": {
+ "locator": "",
+ "name": "ROBJ18",
+ "nspace": "",
+ "snap": "head"
+ },
+ "selected_object_info": {
+ "alloc_hint_flags": 255,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1629828556,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ18",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xddc3680f",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 54,
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "errors": [],
+ "object_info": {
+ "alloc_hint_flags": 0,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1629828556,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ18",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xddc3680f",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 54,
+ "watchers": {}
+ },
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "errors": [],
+ "object_info": {
+ "alloc_hint_flags": 255,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1629828556,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ18",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xddc3680f",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 54,
+ "watchers": {}
+ },
+ "osd": 1,
+ "primary": true,
+ "size": 7
+ }
+ ],
+ "union_shard_errors": []
+ },
+ {
+ "object": {
+ "name": "ROBJ19",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 58
+ },
+ "errors": [
+ "size_too_large"
+ ],
+ "union_shard_errors": [],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ19",
+ "key": "",
+ "snapid": -2,
+ "hash": 2868534344,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "63'59",
+ "prior_version": "63'58",
+ "last_reqid": "osd.1.0:58",
+ "user_version": 58,
+ "size": 1049600,
+ "mtime": "2019-08-09T23:33:58.340709+0000",
+ "local_mtime": "2019-08-09T23:33:58.345676+0000",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x3dde0ef3",
+ "omap_digest": "0xbffddd28",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "errors": [],
+ "size": 1049600
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "errors": [],
+ "size": 1049600
+ }
+ ]
+ },
+ {
+ "shards": [
+ {
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "errors": [
+ "missing"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ3",
+ "key": "",
+ "snapid": -2,
+ "hash": 625845583,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'61",
+ "prior_version": "25'9",
+ "last_reqid": "osd.1.0:60",
+ "user_version": 9,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x00b35dfd",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 9,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ3"
+ }
+ },
+ {
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "bad-val",
+ "name": "key1-ROBJ8"
+ },
+ {
+ "Base64": false,
+ "value": "val2-ROBJ8",
+ "name": "key2-ROBJ8"
+ }
+ ],
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-ROBJ8",
+ "name": "key1-ROBJ8"
+ },
+ {
+ "Base64": false,
+ "value": "val3-ROBJ8",
+ "name": "key3-ROBJ8"
+ }
+ ],
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ8",
+ "key": "",
+ "snapid": -2,
+ "hash": 2359695969,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "79'66",
+ "prior_version": "79'65",
+ "last_reqid": "client.4554.0:1",
+ "user_version": 79,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xd6be81dc",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [],
+ "errors": [
+ "attr_value_mismatch",
+ "attr_name_mismatch"
+ ],
+ "object": {
+ "version": 66,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ8"
+ }
+ },
+ {
+ "shards": [
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ9",
+ "key": "",
+ "snapid": -2,
+ "hash": 537189375,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "95'67",
+ "prior_version": "51'64",
+ "last_reqid": "client.4649.0:1",
+ "user_version": 80,
+ "size": 1,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2b63260d",
+ "omap_digest": "0x2eecc539",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 1,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ9",
+ "key": "",
+ "snapid": -2,
+ "hash": 537189375,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'64",
+ "prior_version": "37'27",
+ "last_reqid": "osd.1.0:63",
+ "user_version": 27,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:25.352485",
+ "local_mtime": "2018-04-05 14:33:25.353746",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2eecc539",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 1,
+ "errors": [
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ9",
+ "key": "",
+ "snapid": -2,
+ "hash": 537189375,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "95'67",
+ "prior_version": "51'64",
+ "last_reqid": "client.4649.0:1",
+ "user_version": 80,
+ "size": 1,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2b63260d",
+ "omap_digest": "0x2eecc539",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "object_info_inconsistency"
+ ],
+ "object": {
+ "version": 67,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ9"
+ }
+ }
+ ],
+ "epoch": 0
+}
+EOF
+
+ jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ jq '.' $dir/json > save1.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+ fi
+
+ objname=ROBJ9
+ # Change data and size again because digest was recomputed
+ echo -n ZZZ > $dir/change
+ rados --pool $poolname put $objname $dir/change
+ # Set one to an even older value
+ objectstore_tool $dir 0 $objname set-attr _ $dir/robj9-oi
+ rm $dir/oi $dir/change
+
+ objname=ROBJ10
+ objectstore_tool $dir 1 $objname get-attr _ > $dir/oi
+ rados --pool $poolname setomapval $objname key2-$objname val2-$objname
+ objectstore_tool $dir 0 $objname set-attr _ $dir/oi
+ objectstore_tool $dir 1 $objname set-attr _ $dir/oi
+ rm $dir/oi
+
+ inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+ inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+
+ # ROBJ19 won't error this time
+ ceph tell osd.\* injectargs -- --osd-max-object-size=134217728
+
+ pg_deep_scrub $pg
+
+ err_strings=()
+ err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
+ err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)], object info inconsistent "
+ err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)]"
+ err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : failed to pick suitable auth object"
+ err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
+ err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
+ err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
+ err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
+ err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:87abbf36:::ROBJ11:head : candidate had a read error"
+ err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
+ err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
+ err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8aa5320e:::ROBJ17:head : failed to pick suitable auth object"
+ err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0xefced57a != omap_digest 0x6a73cc07 from shard 1"
+ err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0x6a73cc07 != omap_digest 0xefced57a from auth oi 3:8b55fa4b:::ROBJ7:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [[]0 0 0[]][)]"
+ err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:a53c12e8:::ROBJ6:head : omap_digest 0x689ee887 != omap_digest 0x179c919f from shard 1, omap_digest 0x689ee887 != omap_digest 0x179c919f from auth oi 3:a53c12e8:::ROBJ6:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [[]0 0 0[]][)]"
+ err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
+ err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
+ err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:b1f19cbd:::ROBJ10:head : failed to pick suitable auth object"
+ err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
+ err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
+ err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
+ err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
+ err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
+ err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
+ err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error"
+ err_strings[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
+ err_strings[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info"
+ err_strings[27]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:e97ce31e:::ROBJ2:head : data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from shard 1, data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from auth oi 3:e97ce31e:::ROBJ2:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [[]0 0 0[]][)]"
+ err_strings[28]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
+ err_strings[29]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:f4981d31:::ROBJ4:head : omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from shard 1, omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from auth oi 3:f4981d31:::ROBJ4:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [[]0 0 0[]][)]"
+ err_strings[30]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x1a862a41 != omap_digest 0x6cac8f6 from shard 1"
+ err_strings[31]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x6cac8f6 != omap_digest 0x1a862a41 from auth oi 3:f4bfd4d1:::ROBJ5:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [[]0 0 0[]][)]"
+ err_strings[32]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : candidate size 3 info size 7 mismatch"
+ err_strings[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
+ err_strings[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
+ err_strings[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input"
+ err_strings[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049715/1049716 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
+ err_strings[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects"
+ err_strings[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors"
+
+ for err_string in "${err_strings[@]}"
+ do
+ if ! grep -q "$err_string" $dir/osd.${primary}.log
+ then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+ rados list-inconsistent-obj $pg > $dir/json || return 1
+ # Get epoch for repair-get requests
+ epoch=$(jq .epoch $dir/json)
+
+ jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "inconsistents": [
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf5fba2c6",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 1454963827,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'58",
+ "prior_version": "21'3",
+ "last_reqid": "osd.1.0:57",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:19.804040",
+ "local_mtime": "2018-04-05 14:33:19.804839",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf5fba2c6",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "data_digest": "0x2d4a11c2",
+ "omap_digest": "0xf5fba2c6",
+ "size": 9,
+ "errors": [
+ "data_digest_mismatch_info",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 1454963827,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'58",
+ "prior_version": "21'3",
+ "last_reqid": "osd.1.0:57",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:19.804040",
+ "local_mtime": "2018-04-05 14:33:19.804839",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf5fba2c6",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "data_digest_mismatch_info",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "data_digest_mismatch",
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 3,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ1"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xa8dd5adc",
+ "size": 7,
+ "errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xa8dd5adc",
+ "size": 7,
+ "errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "alloc_hint_flags": 0,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 3174666125,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ10",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xc2025a24",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 30,
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "errors": [],
+ "object": {
+ "version": 30,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ10"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xa03cef03",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "size": 7,
+ "errors": [
+ "read_error"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ11",
+ "key": "",
+ "snapid": -2,
+ "hash": 1828574689,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'52",
+ "prior_version": "41'33",
+ "last_reqid": "osd.1.0:51",
+ "user_version": 33,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:26.761286",
+ "local_mtime": "2018-04-05 14:33:26.762368",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xa03cef03",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "read_error"
+ ],
+ "errors": [],
+ "object": {
+ "version": 33,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ11"
+ }
+ },
+ {
+ "shards": [
+ {
+ "errors": [
+ "stat_error"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x067f306a",
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ12",
+ "key": "",
+ "snapid": -2,
+ "hash": 3920199997,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'56",
+ "prior_version": "43'36",
+ "last_reqid": "osd.1.0:55",
+ "user_version": 36,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:27.460958",
+ "local_mtime": "2018-04-05 14:33:27.462109",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x067f306a",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "stat_error"
+ ],
+ "errors": [],
+ "object": {
+ "version": 36,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ12"
+ }
+ },
+ {
+ "shards": [
+ {
+ "errors": [
+ "stat_error"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "size": 7,
+ "errors": [
+ "read_error"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "union_shard_errors": [
+ "stat_error",
+ "read_error"
+ ],
+ "errors": [],
+ "object": {
+ "version": 0,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ13"
+ }
+ },
+ {
+ "shards": [
+ {
+ "object_info": "bad-val",
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x4f14f849",
+ "size": 7,
+ "errors": [
+ "info_corrupted"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x4f14f849",
+ "size": 7,
+ "errors": [
+ "info_missing"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "union_shard_errors": [
+ "info_missing",
+ "info_corrupted"
+ ],
+ "errors": [],
+ "object": {
+ "version": 0,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ14"
+ }
+ },
+ {
+ "shards": [
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ15",
+ "key": "",
+ "snapid": -2,
+ "hash": 504996876,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'49",
+ "prior_version": "49'45",
+ "last_reqid": "osd.1.0:48",
+ "user_version": 45,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:29.498969",
+ "local_mtime": "2018-04-05 14:33:29.499890",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2d2a4d6e",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2d2a4d6e",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2d2a4d6e",
+ "size": 7,
+ "errors": [
+ "info_missing"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ15",
+ "key": "",
+ "snapid": -2,
+ "hash": 504996876,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'49",
+ "prior_version": "49'45",
+ "last_reqid": "osd.1.0:48",
+ "user_version": 45,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:29.498969",
+ "local_mtime": "2018-04-05 14:33:29.499890",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2d2a4d6e",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "info_missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 45,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ15"
+ }
+ },
+ {
+ "errors": [],
+ "object": {
+ "locator": "",
+ "name": "ROBJ16",
+ "nspace": "",
+ "snap": "head",
+ "version": 0
+ },
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "errors": [
+ "snapset_missing"
+ ],
+ "omap_digest": "0x8b699207",
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "snapset": "bad-val",
+ "data_digest": "0x2ddbf8f5",
+ "errors": [
+ "snapset_corrupted"
+ ],
+ "omap_digest": "0x8b699207",
+ "osd": 1,
+ "primary": true,
+ "size": 7
+ }
+ ],
+ "union_shard_errors": [
+ "snapset_missing",
+ "snapset_corrupted"
+ ]
+ },
+ {
+ "errors": [],
+ "object": {
+ "locator": "",
+ "name": "ROBJ17",
+ "nspace": "",
+ "snap": "head"
+ },
+ "selected_object_info": {
+ "alloc_hint_flags": 0,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1884071249,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ17",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xe9572720",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 51,
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "data_digest": "0x5af0c3ef",
+ "errors": [
+ "data_digest_mismatch_info"
+ ],
+ "omap_digest": "0xe9572720",
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "data_digest": "0x5af0c3ef",
+ "errors": [
+ "data_digest_mismatch_info"
+ ],
+ "omap_digest": "0xe9572720",
+ "osd": 1,
+ "primary": true,
+ "size": 7
+ }
+ ],
+ "union_shard_errors": [
+ "data_digest_mismatch_info"
+ ]
+ },
+ {
+ "errors": [
+ "object_info_inconsistency"
+ ],
+ "object": {
+ "locator": "",
+ "name": "ROBJ18",
+ "nspace": "",
+ "snap": "head"
+ },
+ "selected_object_info": {
+ "alloc_hint_flags": 255,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1629828556,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ18",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xddc3680f",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 54,
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "data_digest": "0xbd89c912",
+ "errors": [
+ "data_digest_mismatch_info"
+ ],
+ "object_info": {
+ "alloc_hint_flags": 0,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1629828556,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ18",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xddc3680f",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 54,
+ "watchers": {}
+ },
+ "omap_digest": "0xddc3680f",
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "data_digest": "0xbd89c912",
+ "errors": [
+ "data_digest_mismatch_info"
+ ],
+ "object_info": {
+ "alloc_hint_flags": 255,
+ "data_digest": "0x2ddbf8f5",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "lost": 0,
+ "manifest": {
+ "type": 0
+ },
+ "oid": {
+ "hash": 1629828556,
+ "key": "",
+ "max": 0,
+ "namespace": "",
+ "oid": "ROBJ18",
+ "pool": 3,
+ "snapid": -2
+ },
+ "omap_digest": "0xddc3680f",
+ "size": 7,
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "user_version": 54,
+ "watchers": {}
+ },
+ "omap_digest": "0xddc3680f",
+ "osd": 1,
+ "primary": true,
+ "size": 7
+ }
+ ],
+ "union_shard_errors": [
+ "data_digest_mismatch_info"
+ ]
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x578a4830",
+ "omap_digest": "0xf8e11918",
+ "size": 7,
+ "errors": [
+ "data_digest_mismatch_info"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf8e11918",
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ2",
+ "key": "",
+ "snapid": -2,
+ "hash": 2026323607,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'60",
+ "prior_version": "23'6",
+ "last_reqid": "osd.1.0:59",
+ "user_version": 6,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:20.498756",
+ "local_mtime": "2018-04-05 14:33:20.499704",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xf8e11918",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "data_digest_mismatch_info"
+ ],
+ "errors": [
+ "data_digest_mismatch"
+ ],
+ "object": {
+ "version": 6,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ2"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x00b35dfd",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "errors": [
+ "missing"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ3",
+ "key": "",
+ "snapid": -2,
+ "hash": 625845583,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'61",
+ "prior_version": "25'9",
+ "last_reqid": "osd.1.0:60",
+ "user_version": 9,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:21.189382",
+ "local_mtime": "2018-04-05 14:33:21.190446",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x00b35dfd",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 9,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ3"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xd7178dfe",
+ "size": 7,
+ "errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xe2d46ea4",
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ4",
+ "key": "",
+ "snapid": -2,
+ "hash": 2360875311,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'62",
+ "prior_version": "27'12",
+ "last_reqid": "osd.1.0:61",
+ "user_version": 12,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:21.862313",
+ "local_mtime": "2018-04-05 14:33:21.863261",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xe2d46ea4",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "errors": [
+ "omap_digest_mismatch"
+ ],
+ "object": {
+ "version": 12,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ4"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x1a862a41",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x06cac8f6",
+ "size": 7,
+ "errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2334915887,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'63",
+ "prior_version": "29'15",
+ "last_reqid": "osd.1.0:62",
+ "user_version": 15,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:22.589300",
+ "local_mtime": "2018-04-05 14:33:22.590376",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x1a862a41",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "errors": [
+ "omap_digest_mismatch"
+ ],
+ "object": {
+ "version": 15,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ5"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x689ee887",
+ "size": 7,
+ "errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x179c919f",
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ6",
+ "key": "",
+ "snapid": -2,
+ "hash": 390610085,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'54",
+ "prior_version": "31'18",
+ "last_reqid": "osd.1.0:53",
+ "user_version": 18,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:23.289188",
+ "local_mtime": "2018-04-05 14:33:23.290130",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x179c919f",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "errors": [
+ "omap_digest_mismatch"
+ ],
+ "object": {
+ "version": 18,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ6"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xefced57a",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x6a73cc07",
+ "size": 7,
+ "errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ7",
+ "key": "",
+ "snapid": -2,
+ "hash": 3529485009,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'53",
+ "prior_version": "33'21",
+ "last_reqid": "osd.1.0:52",
+ "user_version": 21,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:23.979658",
+ "local_mtime": "2018-04-05 14:33:23.980731",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xefced57a",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "omap_digest_mismatch_info"
+ ],
+ "errors": [
+ "omap_digest_mismatch"
+ ],
+ "object": {
+ "version": 21,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ7"
+ }
+ },
+ {
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "bad-val",
+ "name": "key1-ROBJ8"
+ },
+ {
+ "Base64": false,
+ "value": "val2-ROBJ8",
+ "name": "key2-ROBJ8"
+ }
+ ],
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xd6be81dc",
+ "size": 7,
+ "errors": [],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-ROBJ8",
+ "name": "key1-ROBJ8"
+ },
+ {
+ "Base64": false,
+ "value": "val3-ROBJ8",
+ "name": "key3-ROBJ8"
+ }
+ ],
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xd6be81dc",
+ "size": 7,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ8",
+ "key": "",
+ "snapid": -2,
+ "hash": 2359695969,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "79'66",
+ "prior_version": "79'65",
+ "last_reqid": "client.4554.0:1",
+ "user_version": 79,
+ "size": 7,
+ "mtime": "2018-04-05 14:34:05.598688",
+ "local_mtime": "2018-04-05 14:34:05.599698",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xd6be81dc",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [],
+ "errors": [
+ "attr_value_mismatch",
+ "attr_name_mismatch"
+ ],
+ "object": {
+ "version": 66,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ8"
+ }
+ },
+ {
+ "shards": [
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ9",
+ "key": "",
+ "snapid": -2,
+ "hash": 537189375,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "51'64",
+ "prior_version": "37'27",
+ "last_reqid": "osd.1.0:63",
+ "user_version": 27,
+ "size": 7,
+ "mtime": "2018-04-05 14:33:25.352485",
+ "local_mtime": "2018-04-05 14:33:25.353746",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0x2eecc539",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "data_digest": "0x1f26fb26",
+ "omap_digest": "0x2eecc539",
+ "size": 3,
+ "errors": [
+ "obj_size_info_mismatch"
+ ],
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "ROBJ9",
+ "key": "",
+ "snapid": -2,
+ "hash": 537189375,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "119'68",
+ "prior_version": "51'64",
+ "last_reqid": "client.4834.0:1",
+ "user_version": 81,
+ "size": 3,
+ "mtime": "2018-04-05 14:35:01.500659",
+ "local_mtime": "2018-04-05 14:35:01.502117",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x1f26fb26",
+ "omap_digest": "0x2eecc539",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "data_digest": "0x1f26fb26",
+ "omap_digest": "0x2eecc539",
+ "size": 3,
+ "errors": [],
+ "osd": 1,
+ "primary": true
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ9",
+ "key": "",
+ "snapid": -2,
+ "hash": 537189375,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "119'68",
+ "prior_version": "51'64",
+ "last_reqid": "client.4834.0:1",
+ "user_version": 81,
+ "size": 3,
+ "mtime": "2018-04-05 14:35:01.500659",
+ "local_mtime": "2018-04-05 14:35:01.502117",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest",
+ "omap_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x1f26fb26",
+ "omap_digest": "0x2eecc539",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "object_info_inconsistency"
+ ],
+ "object": {
+ "version": 68,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "ROBJ9"
+ }
+ }
+ ],
+ "epoch": 0
+}
+EOF
+
+ jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ jq '.' $dir/json > save2.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+ fi
+
+ repair $pg
+ wait_for_clean
+
+ # This hangs if the repair doesn't work
+ timeout 30 rados -p $poolname get ROBJ17 $dir/robj17.out || return 1
+ timeout 30 rados -p $poolname get ROBJ18 $dir/robj18.out || return 1
+ # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17
+ diff -q $dir/new.ROBJ17 $dir/robj17.out || return 1
+ rm -f $dir/new.ROBJ17 $dir/robj17.out || return 1
+ diff -q $dir/new.ROBJ18 $dir/robj18.out || return 1
+ rm -f $dir/new.ROBJ18 $dir/robj18.out || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+}
+
+
+#
+# Test scrub errors for an erasure coded pool
+#
+function corrupt_scrub_erasure() {
+ local dir=$1
+ local allow_overwrites=$2
+ local poolname=ecpool
+ local total_objs=7
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ for id in $(seq 0 2) ; do
+ run_osd $dir $id || return 1
+ done
+ create_rbd_pool || return 1
+ create_pool foo 1
+
+ create_ec_pool $poolname $allow_overwrites k=2 m=1 stripe_unit=2K --force || return 1
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=EOBJ${i}
+ add_something $dir $poolname $objname || return 1
+
+ local osd=$(expr $i % 2)
+
+ case $i in
+ 1)
+ # Size (deep scrub data_digest too)
+ local payload=UVWXYZZZ
+ echo $payload > $dir/CORRUPT
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 2)
+ # Corrupt EC shard
+ dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=1
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 3)
+ # missing
+ objectstore_tool $dir $osd $objname remove || return 1
+ ;;
+
+ 4)
+ rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
+ rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
+
+ # Break xattrs
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
+ objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
+ echo -n val3-$objname > $dir/newval
+ objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
+ rm $dir/bad-val $dir/newval
+ ;;
+
+ 5)
+ # Corrupt EC shard
+ dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=2
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 6)
+ objectstore_tool $dir 0 $objname rm-attr hinfo_key || return 1
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir 1 $objname set-attr hinfo_key $dir/bad-val || return 1
+ ;;
+
+ 7)
+ local payload=MAKETHISDIFFERENTFROMOTHEROBJECTS
+ echo $payload > $dir/DIFFERENT
+ rados --pool $poolname put $objname $dir/DIFFERENT || return 1
+
+ # Get hinfo_key from EOBJ1
+ objectstore_tool $dir 0 EOBJ1 get-attr hinfo_key > $dir/hinfo
+ objectstore_tool $dir 0 $objname set-attr hinfo_key $dir/hinfo || return 1
+ rm -f $dir/hinfo
+ ;;
+
+ esac
+ done
+
+ local pg=$(get_pg $poolname EOBJ0)
+
+ pg_scrub $pg
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+ rados list-inconsistent-obj $pg > $dir/json || return 1
+ # Get epoch for repair-get requests
+ epoch=$(jq .epoch $dir/json)
+
+ jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "inconsistents": [
+ {
+ "shards": [
+ {
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "EOBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 560836233,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "27'1",
+ "prior_version": "0'0",
+ "last_reqid": "client.4184.0:1",
+ "user_version": 1,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 9,
+ "shard": 0,
+ "errors": [
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 560836233,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "27'1",
+ "prior_version": "0'0",
+ "last_reqid": "client.4184.0:1",
+ "user_version": 1,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 1,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ1"
+ }
+ },
+ {
+ "shards": [
+ {
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "shard": 0,
+ "errors": [
+ "missing"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ3",
+ "key": "",
+ "snapid": -2,
+ "hash": 3125668237,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "39'3",
+ "prior_version": "0'0",
+ "last_reqid": "client.4252.0:1",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 3,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ3"
+ }
+ },
+ {
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "bad-val",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val2-EOBJ4",
+ "name": "key2-EOBJ4"
+ }
+ ],
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [],
+ "size": 2048,
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-EOBJ4",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val2-EOBJ4",
+ "name": "key2-EOBJ4"
+ }
+ ]
+ },
+ {
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "errors": [],
+ "size": 2048,
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-EOBJ4",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val3-EOBJ4",
+ "name": "key3-EOBJ4"
+ }
+ ]
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ4",
+ "key": "",
+ "snapid": -2,
+ "hash": 1618759290,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "45'6",
+ "prior_version": "45'5",
+ "last_reqid": "client.4294.0:1",
+ "user_version": 6,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [],
+ "errors": [
+ "attr_value_mismatch",
+ "attr_name_mismatch"
+ ],
+ "object": {
+ "version": 6,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ4"
+ }
+ },
+ {
+ "shards": [
+ {
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "EOBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2918945441,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "59'7",
+ "prior_version": "0'0",
+ "last_reqid": "client.4382.0:1",
+ "user_version": 7,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 4096,
+ "shard": 0,
+ "errors": [
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2918945441,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "59'7",
+ "prior_version": "0'0",
+ "last_reqid": "client.4382.0:1",
+ "user_version": 7,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 7,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ5"
+ }
+ },
+ {
+ "errors": [],
+ "object": {
+ "locator": "",
+ "name": "EOBJ6",
+ "nspace": "",
+ "snap": "head",
+ "version": 8
+ },
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ6",
+ "key": "",
+ "snapid": -2,
+ "hash": 3050890866,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "65'8",
+ "prior_version": "0'0",
+ "last_reqid": "client.4418.0:1",
+ "user_version": 8,
+ "size": 7,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "errors": [
+ "hinfo_missing"
+ ],
+ "osd": 0,
+ "primary": false,
+ "shard": 2,
+ "size": 2048
+ },
+ {
+ "errors": [
+ "hinfo_corrupted"
+ ],
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "hashinfo": "bad-val",
+ "size": 2048
+ },
+ {
+ "errors": [],
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "size": 2048,
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 80717615,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 80717615,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ }
+ ],
+ "union_shard_errors": [
+ "hinfo_missing",
+ "hinfo_corrupted"
+ ]
+ },
+ {
+ "errors": [
+ "hinfo_inconsistency"
+ ],
+ "object": {
+ "locator": "",
+ "name": "EOBJ7",
+ "nspace": "",
+ "snap": "head",
+ "version": 10
+ },
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ7",
+ "key": "",
+ "snapid": -2,
+ "hash": 3258066308,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "75'10",
+ "prior_version": "75'9",
+ "last_reqid": "client.4482.0:1",
+ "user_version": 10,
+ "size": 34,
+ "mtime": "",
+ "local_mtime": "",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x136e4e27",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 80717615,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 80717615,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ },
+ "errors": [],
+ "osd": 0,
+ "primary": false,
+ "shard": 2,
+ "size": 2048
+ },
+ {
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 1534350760,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 1534350760,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ },
+ "errors": [],
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "size": 2048
+ },
+ {
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 1534350760,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 1534350760,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ },
+ "errors": [],
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "size": 2048
+ }
+ ],
+ "union_shard_errors": []
+ }
+ ],
+ "epoch": 0
+}
+EOF
+
+ jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ jq '.' $dir/json > save3.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+ fi
+
+ pg_deep_scrub $pg
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+ rados list-inconsistent-obj $pg > $dir/json || return 1
+ # Get epoch for repair-get requests
+ epoch=$(jq .epoch $dir/json)
+
+ if [ "$allow_overwrites" = "true" ]
+ then
+ jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "inconsistents": [
+ {
+ "shards": [
+ {
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "EOBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 560836233,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "27'1",
+ "prior_version": "0'0",
+ "last_reqid": "client.4184.0:1",
+ "user_version": 1,
+ "size": 7,
+ "mtime": "2018-04-05 14:31:33.837147",
+ "local_mtime": "2018-04-05 14:31:33.840763",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 9,
+ "shard": 0,
+ "errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 560836233,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "27'1",
+ "prior_version": "0'0",
+ "last_reqid": "client.4184.0:1",
+ "user_version": 1,
+ "size": 7,
+ "mtime": "2018-04-05 14:31:33.837147",
+ "local_mtime": "2018-04-05 14:31:33.840763",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 1,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ1"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "shard": 0,
+ "errors": [
+ "missing"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ3",
+ "key": "",
+ "snapid": -2,
+ "hash": 3125668237,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "39'3",
+ "prior_version": "0'0",
+ "last_reqid": "client.4252.0:1",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "2018-04-05 14:31:46.841145",
+ "local_mtime": "2018-04-05 14:31:46.844996",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 3,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ3"
+ }
+ },
+ {
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "bad-val",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val2-EOBJ4",
+ "name": "key2-EOBJ4"
+ }
+ ],
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-EOBJ4",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val2-EOBJ4",
+ "name": "key2-EOBJ4"
+ }
+ ],
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 0,
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-EOBJ4",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val3-EOBJ4",
+ "name": "key3-EOBJ4"
+ }
+ ],
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 1,
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ4",
+ "key": "",
+ "snapid": -2,
+ "hash": 1618759290,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "45'6",
+ "prior_version": "45'5",
+ "last_reqid": "client.4294.0:1",
+ "user_version": 6,
+ "size": 7,
+ "mtime": "2018-04-05 14:31:54.663622",
+ "local_mtime": "2018-04-05 14:31:54.664527",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [],
+ "errors": [
+ "attr_value_mismatch",
+ "attr_name_mismatch"
+ ],
+ "object": {
+ "version": 6,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ4"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "EOBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2918945441,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "59'7",
+ "prior_version": "0'0",
+ "last_reqid": "client.4382.0:1",
+ "user_version": 7,
+ "size": 7,
+ "mtime": "2018-04-05 14:32:12.929161",
+ "local_mtime": "2018-04-05 14:32:12.934707",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 4096,
+ "errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "shard": 0,
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "data_digest": "0x00000000",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 1,
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2918945441,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "59'7",
+ "prior_version": "0'0",
+ "last_reqid": "client.4382.0:1",
+ "user_version": 7,
+ "size": 7,
+ "mtime": "2018-04-05 14:32:12.929161",
+ "local_mtime": "2018-04-05 14:32:12.934707",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 7,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ5"
+ }
+ },
+ {
+ "object": {
+ "name": "EOBJ6",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 8
+ },
+ "errors": [],
+ "union_shard_errors": [
+ "read_error",
+ "hinfo_missing",
+ "hinfo_corrupted"
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ6",
+ "key": "",
+ "snapid": -2,
+ "hash": 3050890866,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "65'8",
+ "prior_version": "0'0",
+ "last_reqid": "client.4418.0:1",
+ "user_version": 8,
+ "size": 7,
+ "mtime": "2018-04-05 14:32:20.634116",
+ "local_mtime": "2018-04-05 14:32:20.637999",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "shard": 2,
+ "errors": [
+ "read_error",
+ "hinfo_missing"
+ ],
+ "size": 2048
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [
+ "read_error",
+ "hinfo_corrupted"
+ ],
+ "size": 2048,
+ "hashinfo": "bad-val"
+ },
+ {
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x00000000",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 80717615,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 80717615,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ }
+ ]
+ },
+ {
+ "object": {
+ "name": "EOBJ7",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 10
+ },
+ "errors": [
+ "hinfo_inconsistency"
+ ],
+ "union_shard_errors": [],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ7",
+ "key": "",
+ "snapid": -2,
+ "hash": 3258066308,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "75'10",
+ "prior_version": "75'9",
+ "last_reqid": "client.4482.0:1",
+ "user_version": 10,
+ "size": 34,
+ "mtime": "2018-04-05 14:32:33.058782",
+ "local_mtime": "2018-04-05 14:32:33.059679",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x136e4e27",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "shard": 2,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x00000000",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 80717615,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 80717615,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x00000000",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 1534350760,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 1534350760,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ },
+ {
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x00000000",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 1534350760,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 1534350760,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ }
+ ]
+ }
+ ],
+ "epoch": 0
+}
+EOF
+
+ else
+
+ jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "inconsistents": [
+ {
+ "shards": [
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "EOBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 560836233,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "27'1",
+ "prior_version": "0'0",
+ "last_reqid": "client.4192.0:1",
+ "user_version": 1,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:10.688009",
+ "local_mtime": "2018-04-05 14:30:10.691774",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 9,
+ "shard": 0,
+ "errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 560836233,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "27'1",
+ "prior_version": "0'0",
+ "last_reqid": "client.4192.0:1",
+ "user_version": 1,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:10.688009",
+ "local_mtime": "2018-04-05 14:30:10.691774",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 1,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ1"
+ }
+ },
+ {
+ "shards": [
+ {
+ "size": 2048,
+ "errors": [
+ "ec_hash_error"
+ ],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 0,
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 1,
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ2",
+ "key": "",
+ "snapid": -2,
+ "hash": 562812377,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "33'2",
+ "prior_version": "0'0",
+ "last_reqid": "client.4224.0:1",
+ "user_version": 2,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:14.152945",
+ "local_mtime": "2018-04-05 14:30:14.154014",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "ec_hash_error"
+ ],
+ "errors": [],
+ "object": {
+ "version": 2,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ2"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [
+ "missing"
+ ]
+ },
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ3",
+ "key": "",
+ "snapid": -2,
+ "hash": 3125668237,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "39'3",
+ "prior_version": "0'0",
+ "last_reqid": "client.4258.0:1",
+ "user_version": 3,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:18.875544",
+ "local_mtime": "2018-04-05 14:30:18.880153",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "missing"
+ ],
+ "errors": [],
+ "object": {
+ "version": 3,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ3"
+ }
+ },
+ {
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "bad-val",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val2-EOBJ4",
+ "name": "key2-EOBJ4"
+ }
+ ],
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x04cfa72f",
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-EOBJ4",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val2-EOBJ4",
+ "name": "key2-EOBJ4"
+ }
+ ]
+ },
+ {
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x04cfa72f",
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "val1-EOBJ4",
+ "name": "key1-EOBJ4"
+ },
+ {
+ "Base64": false,
+ "value": "val3-EOBJ4",
+ "name": "key3-EOBJ4"
+ }
+ ]
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ4",
+ "key": "",
+ "snapid": -2,
+ "hash": 1618759290,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "45'6",
+ "prior_version": "45'5",
+ "last_reqid": "client.4296.0:1",
+ "user_version": 6,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:22.271983",
+ "local_mtime": "2018-04-05 14:30:22.272840",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [],
+ "errors": [
+ "attr_value_mismatch",
+ "attr_name_mismatch"
+ ],
+ "object": {
+ "version": 6,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ4"
+ }
+ },
+ {
+ "shards": [
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "errors": [],
+ "shard": 2,
+ "osd": 0,
+ "primary": false
+ },
+ {
+ "object_info": {
+ "oid": {
+ "oid": "EOBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2918945441,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "59'7",
+ "prior_version": "0'0",
+ "last_reqid": "client.4384.0:1",
+ "user_version": 7,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:35.162395",
+ "local_mtime": "2018-04-05 14:30:35.166390",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "size": 4096,
+ "shard": 0,
+ "errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
+ },
+ {
+ "data_digest": "0x04cfa72f",
+ "omap_digest": "0xffffffff",
+ "size": 2048,
+ "shard": 1,
+ "errors": [],
+ "osd": 2,
+ "primary": false
+ }
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ5",
+ "key": "",
+ "snapid": -2,
+ "hash": 2918945441,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "59'7",
+ "prior_version": "0'0",
+ "last_reqid": "client.4384.0:1",
+ "user_version": 7,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:35.162395",
+ "local_mtime": "2018-04-05 14:30:35.166390",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "union_shard_errors": [
+ "read_error",
+ "size_mismatch_info",
+ "obj_size_info_mismatch"
+ ],
+ "errors": [
+ "size_mismatch"
+ ],
+ "object": {
+ "version": 7,
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "EOBJ5"
+ }
+ },
+ {
+ "object": {
+ "name": "EOBJ6",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 8
+ },
+ "errors": [],
+ "union_shard_errors": [
+ "read_error",
+ "hinfo_missing",
+ "hinfo_corrupted"
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ6",
+ "key": "",
+ "snapid": -2,
+ "hash": 3050890866,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "65'8",
+ "prior_version": "0'0",
+ "last_reqid": "client.4420.0:1",
+ "user_version": 8,
+ "size": 7,
+ "mtime": "2018-04-05 14:30:40.914673",
+ "local_mtime": "2018-04-05 14:30:40.917705",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x2ddbf8f5",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "shard": 2,
+ "errors": [
+ "read_error",
+ "hinfo_missing"
+ ],
+ "size": 2048
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [
+ "read_error",
+ "hinfo_corrupted"
+ ],
+ "size": 2048,
+ "hashinfo": "bad-val"
+ },
+ {
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x04cfa72f",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 80717615,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 80717615,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ }
+ ]
+ },
+ {
+ "object": {
+ "name": "EOBJ7",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 10
+ },
+ "errors": [
+ "hinfo_inconsistency"
+ ],
+ "union_shard_errors": [
+ "ec_hash_error"
+ ],
+ "selected_object_info": {
+ "oid": {
+ "oid": "EOBJ7",
+ "key": "",
+ "snapid": -2,
+ "hash": 3258066308,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "75'10",
+ "prior_version": "75'9",
+ "last_reqid": "client.4486.0:1",
+ "user_version": 10,
+ "size": 34,
+ "mtime": "2018-04-05 14:30:50.995009",
+ "local_mtime": "2018-04-05 14:30:50.996112",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x136e4e27",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "shard": 2,
+ "errors": [
+ "ec_hash_error"
+ ],
+ "size": 2048,
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 80717615,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 80717615,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "shard": 0,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x5b7455a8",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 1534350760,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 1534350760,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ },
+ {
+ "osd": 2,
+ "primary": false,
+ "shard": 1,
+ "errors": [],
+ "size": 2048,
+ "omap_digest": "0xffffffff",
+ "data_digest": "0x5b7455a8",
+ "hashinfo": {
+ "cumulative_shard_hashes": [
+ {
+ "hash": 1534350760,
+ "shard": 0
+ },
+ {
+ "hash": 1534491824,
+ "shard": 1
+ },
+ {
+ "hash": 1534350760,
+ "shard": 2
+ }
+ ],
+ "total_chunk_size": 2048
+ }
+ }
+ ]
+ }
+ ],
+ "epoch": 0
+}
+EOF
+
+ fi
+
+ jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ if [ "$allow_overwrites" = "true" ]
+ then
+ num=4
+ else
+ num=5
+ fi
+ jq '.' $dir/json > save${num}.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+ fi
+
+ ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+}
+
+function TEST_corrupt_scrub_erasure_appends() {
+ corrupt_scrub_erasure $1 false
+}
+
+function TEST_corrupt_scrub_erasure_overwrites() {
+ if [ "$use_ec_overwrite" = "true" ]; then
+ corrupt_scrub_erasure $1 true
+ fi
+}
+
+#
+# Test to make sure that a periodic scrub won't cause deep-scrub info to be lost
+#
+function TEST_periodic_scrub_replicated() {
+ local dir=$1
+ local poolname=psr_pool
+ local objname=POBJ
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+ ceph_osd_args+="--osd_scrub_backoff_ratio=0"
+ run_osd $dir 0 $ceph_osd_args || return 1
+ run_osd $dir 1 $ceph_osd_args || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ local osd=0
+ add_something $dir $poolname $objname scrub || return 1
+ local primary=$(get_primary $poolname $objname)
+ local pg=$(get_pg $poolname $objname)
+
+ # Add deep-scrub only error
+ local payload=UVWXYZ
+ echo $payload > $dir/CORRUPT
+ # Uses $ceph_osd_args for osd restart
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+
+ # No scrub information available, so expect failure
+ set -o pipefail
+ ! rados list-inconsistent-obj $pg | jq '.' || return 1
+ set +o pipefail
+
+ pg_deep_scrub $pg || return 1
+
+ # Make sure bad object found
+ rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+
+ flush_pg_stats
+ local last_scrub=$(get_last_scrub_stamp $pg)
+ # Fake a schedule scrub
+ ceph tell $pg scrub || return 1
+ # Wait for schedule regular scrub
+ wait_for_scrub $pg "$last_scrub"
+
+ # It needed to be upgraded
+ grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.${primary}.log || return 1
+
+ # Bad object still known
+ rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+
+ # Can't upgrade with this set
+ ceph osd set nodeep-scrub
+ # Let map change propagate to OSDs
+ ceph tell osd.0 get_latest_osdmap
+ flush_pg_stats
+ sleep 5
+
+ # Fake a schedule scrub
+ ceph tell $pg scrub || return 1
+ # Wait for schedule regular scrub
+ # to notice scrub and skip it
+ local found=false
+ for i in $(seq 14 -1 0)
+ do
+ sleep 1
+ ! grep -q "Regular scrub skipped due to deep-scrub errors and nodeep-scrub set" $dir/osd.${primary}.log || { found=true ; break; }
+ echo Time left: $i seconds
+ done
+ test $found = "true" || return 1
+
+ # Bad object still known
+ rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+
+ flush_pg_stats
+ # Request a regular scrub and it will be done
+ pg_scrub $pg
+ grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
+
+ # deep-scrub error is no longer present
+ rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
+}
+
+function TEST_scrub_warning() {
+ local dir=$1
+ local poolname=psr_pool
+ local objname=POBJ
+ local scrubs=5
+ local deep_scrubs=5
+ local i1_day=86400
+ local i7_days=$(calc $i1_day \* 7)
+ local i14_days=$(calc $i1_day \* 14)
+ local overdue=0.5
+ local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) )
+ local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) )
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1
+ run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1
+
+ for i in $(seq 1 $(expr $scrubs + $deep_scrubs))
+ do
+ create_pool $poolname-$i 1 1 || return 1
+ wait_for_clean || return 1
+ if [ $i = "1" ];
+ then
+ ceph osd pool set $poolname-$i scrub_max_interval $i14_days
+ fi
+ if [ $i = $(expr $scrubs + 1) ];
+ then
+ ceph osd pool set $poolname-$i deep_scrub_interval $i14_days
+ fi
+ done
+
+ # Only 1 osd
+ local primary=0
+
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+ ceph config set global osd_scrub_interval_randomize_ratio 0
+ ceph config set global osd_deep_scrub_randomize_ratio 0
+ ceph config set global osd_scrub_max_interval ${i7_days}
+ ceph config set global osd_deep_scrub_interval ${i7_days}
+
+ # Fake schedule scrubs
+ for i in $(seq 1 $scrubs)
+ do
+ if [ $i = "1" ];
+ then
+ overdue_seconds=$pool_overdue_seconds
+ else
+ overdue_seconds=$conf_overdue_seconds
+ fi
+ ceph tell ${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) || return 1
+ done
+ # Fake schedule deep scrubs
+ for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs))
+ do
+ if [ $i = "$(expr $scrubs + 1)" ];
+ then
+ overdue_seconds=$pool_overdue_seconds
+ else
+ overdue_seconds=$conf_overdue_seconds
+ fi
+ ceph tell ${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) || return 1
+ done
+ flush_pg_stats
+
+ ceph health
+ ceph health detail
+ ceph health | grep -q " pgs not deep-scrubbed in time" || return 1
+ ceph health | grep -q " pgs not scrubbed in time" || return 1
+
+ # note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub
+ # time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for
+ # regular scrubbing. For now, we'll allow both responses.
+ COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l)
+
+ if (( $COUNT != $scrubs && $COUNT != $(expr $scrubs+$deep_scrubs) )); then
+ ceph health detail | grep "not scrubbed since"
+ return 1
+ fi
+ COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l)
+ if [ "$COUNT" != $deep_scrubs ]; then
+ ceph health detail | grep "not deep-scrubbed since"
+ return 1
+ fi
+}
+
+#
+# Corrupt snapset in replicated pool
+#
+function TEST_corrupt_snapset_scrub_rep() {
+ local dir=$1
+ local poolname=csr_pool
+ local total_objs=2
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_pool foo 1 || return 1
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=ROBJ${i}
+ add_something $dir $poolname $objname || return 1
+
+ rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+ rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+ done
+
+ local pg=$(get_pg $poolname ROBJ0)
+ local primary=$(get_primary $poolname ROBJ0)
+
+ rados -p $poolname mksnap snap1
+ echo -n head_of_snapshot_data > $dir/change
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=ROBJ${i}
+
+ # Alternate corruption between osd.0 and osd.1
+ local osd=$(expr $i % 2)
+
+ case $i in
+ 1)
+ rados --pool $poolname put $objname $dir/change
+ objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
+ ;;
+
+ 2)
+ rados --pool $poolname put $objname $dir/change
+ objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
+ ;;
+
+ esac
+ done
+ rm $dir/change
+
+ pg_scrub $pg
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+ rados list-inconsistent-obj $pg > $dir/json || return 1
+
+ jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "epoch": 34,
+ "inconsistents": [
+ {
+ "object": {
+ "name": "ROBJ1",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 8
+ },
+ "errors": [
+ "snapset_inconsistency"
+ ],
+ "union_shard_errors": [],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ1",
+ "key": "",
+ "snapid": -2,
+ "hash": 1454963827,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "24'8",
+ "prior_version": "21'3",
+ "last_reqid": "client.4195.0:1",
+ "user_version": 8,
+ "size": 21,
+ "mtime": "2018-04-05 14:35:43.286117",
+ "local_mtime": "2018-04-05 14:35:43.288990",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x53acb008",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "errors": [],
+ "size": 21,
+ "snapset": {
+ "clones": [
+ {
+ "overlap": "[]",
+ "size": 7,
+ "snap": 1,
+ "snaps": [
+ 1
+ ]
+ }
+ ],
+ "seq": 1
+ }
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "errors": [],
+ "size": 21,
+ "snapset": {
+ "clones": [],
+ "seq": 0
+ }
+ }
+ ]
+ },
+ {
+ "object": {
+ "name": "ROBJ2",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "version": 10
+ },
+ "errors": [
+ "snapset_inconsistency"
+ ],
+ "union_shard_errors": [],
+ "selected_object_info": {
+ "oid": {
+ "oid": "ROBJ2",
+ "key": "",
+ "snapid": -2,
+ "hash": 2026323607,
+ "max": 0,
+ "pool": 3,
+ "namespace": ""
+ },
+ "version": "28'10",
+ "prior_version": "23'6",
+ "last_reqid": "client.4223.0:1",
+ "user_version": 10,
+ "size": 21,
+ "mtime": "2018-04-05 14:35:48.326856",
+ "local_mtime": "2018-04-05 14:35:48.328097",
+ "lost": 0,
+ "flags": [
+ "dirty",
+ "omap",
+ "data_digest"
+ ],
+ "truncate_seq": 0,
+ "truncate_size": 0,
+ "data_digest": "0x53acb008",
+ "omap_digest": "0xffffffff",
+ "expected_object_size": 0,
+ "expected_write_size": 0,
+ "alloc_hint_flags": 0,
+ "manifest": {
+ "type": 0
+ },
+ "watchers": {}
+ },
+ "shards": [
+ {
+ "osd": 0,
+ "primary": false,
+ "errors": [],
+ "size": 21,
+ "snapset": {
+ "clones": [],
+ "seq": 0
+ }
+ },
+ {
+ "osd": 1,
+ "primary": true,
+ "errors": [],
+ "size": 21,
+ "snapset": {
+ "clones": [
+ {
+ "overlap": "[]",
+ "size": 7,
+ "snap": 1,
+ "snaps": [
+ 1
+ ]
+ }
+ ],
+ "seq": 1
+ }
+ }
+ ]
+ }
+ ]
+}
+EOF
+
+ jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ jq '.' $dir/json > save6.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+ fi
+
+ ERRORS=0
+ declare -a err_strings
+ err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ1:head : snapset inconsistent"
+ err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ2:head : snapset inconsistent"
+ err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*:.*:::ROBJ1:1 : is an unexpected clone"
+ err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 3/4 objects, 1/2 clones, 3/4 dirty, 3/4 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 49/56 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
+ err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 0 missing, 2 inconsistent objects"
+ err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 4 errors"
+
+ for err_string in "${err_strings[@]}"
+ do
+ if ! grep -q "$err_string" $dir/osd.${primary}.log
+ then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+}
+
+function TEST_request_scrub_priority() {
+ local dir=$1
+ local poolname=psr_pool
+ local objname=POBJ
+ local OBJECTS=64
+ local PGS=8
+
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+ ceph_osd_args+="--osd_scrub_backoff_ratio=0"
+ run_osd $dir 0 $ceph_osd_args || return 1
+
+ create_pool $poolname $PGS $PGS || return 1
+ wait_for_clean || return 1
+
+ local osd=0
+ add_something $dir $poolname $objname noscrub || return 1
+ local primary=$(get_primary $poolname $objname)
+ local pg=$(get_pg $poolname $objname)
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ local otherpgs
+ for i in $(seq 0 $(expr $PGS - 1))
+ do
+ opg="${poolid}.${i}"
+ if [ "$opg" = "$pg" ]; then
+ continue
+ fi
+ otherpgs="${otherpgs}${opg} "
+ local other_last_scrub=$(get_last_scrub_stamp $pg)
+ # Fake a schedule scrub
+ ceph tell $opg scrub $opg || return 1
+ done
+
+ sleep 15
+ flush_pg_stats
+
+ # Request a regular scrub and it will be done
+ local last_scrub=$(get_last_scrub_stamp $pg)
+ ceph pg scrub $pg
+
+ ceph osd unset noscrub || return 1
+ ceph osd unset nodeep-scrub || return 1
+
+ wait_for_scrub $pg "$last_scrub"
+
+ for opg in $otherpgs $pg
+ do
+ wait_for_scrub $opg "$other_last_scrub"
+ done
+
+ # Verify that the requested scrub ran first
+ grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
+}
+
+
+main osd-scrub-repair "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh osd-scrub-repair.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-snaps.sh b/qa/standalone/scrub/osd-scrub-snaps.sh
new file mode 100755
index 000000000..c543b48a1
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-snaps.sh
@@ -0,0 +1,1188 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+# Test development and debugging
+# Set to "yes" in order to ignore diff errors and save results to update test
+getjson="no"
+
+jqfilter='.inconsistents'
+sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print ( json.dumps(ud, sort_keys=True, indent=2) )'
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function create_scenario() {
+ local dir=$1
+ local poolname=$2
+ local TESTDATA=$3
+ local osd=$4
+
+ SNAP=1
+ rados -p $poolname mksnap snap${SNAP}
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+ rados -p $poolname put obj1 $TESTDATA
+ rados -p $poolname put obj5 $TESTDATA
+ rados -p $poolname put obj3 $TESTDATA
+ for i in `seq 6 14`
+ do rados -p $poolname put obj${i} $TESTDATA
+ done
+
+ SNAP=2
+ rados -p $poolname mksnap snap${SNAP}
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+ rados -p $poolname put obj5 $TESTDATA
+
+ SNAP=3
+ rados -p $poolname mksnap snap${SNAP}
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+ rados -p $poolname put obj3 $TESTDATA
+
+ SNAP=4
+ rados -p $poolname mksnap snap${SNAP}
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+ rados -p $poolname put obj5 $TESTDATA
+ rados -p $poolname put obj2 $TESTDATA
+
+ SNAP=5
+ rados -p $poolname mksnap snap${SNAP}
+ SNAP=6
+ rados -p $poolname mksnap snap${SNAP}
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+ rados -p $poolname put obj5 $TESTDATA
+
+ SNAP=7
+ rados -p $poolname mksnap snap${SNAP}
+
+ rados -p $poolname rm obj4
+ rados -p $poolname rm obj16
+ rados -p $poolname rm obj2
+
+ kill_daemons $dir TERM osd || return 1
+
+ # Don't need to use ceph_objectstore_tool() function because osd stopped
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" --force remove || return 1
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)"
+ OBJ5SAVE="$JSON"
+ # Starts with a snapmap
+ ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+ grep SNA_ $dir/drk.log
+ grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1
+ ceph-objectstore-tool --data-path $dir/${osd} --rmtype nosnapmap "$JSON" remove || return 1
+ # Check that snapmap is stil there
+ ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+ grep SNA_ $dir/drk.log
+ grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1
+ rm -f $dir/drk.log
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)"
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=18
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)"
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=15
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1
+
+ # Starts with a snapmap
+ ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+ grep SNA_ $dir/drk.log
+ grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)"
+ ceph-objectstore-tool --data-path $dir/${osd} --rmtype snapmap "$JSON" remove || return 1
+ # Check that snapmap is now removed
+ ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+ grep SNA_ $dir/drk.log
+ ! grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1
+ rm -f $dir/drk.log
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" rm-attr snapset || return 1
+
+ # Create a clone which isn't in snapset and doesn't have object info
+ JSON="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)"
+ dd if=/dev/urandom of=$TESTDATA bs=256 count=7
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
+
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset corrupt || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset seq || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_size || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_overlap || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clones || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset head || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset snaps || return 1
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset size || return 1
+
+ echo "garbage" > $dir/bad
+ JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)"
+ ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-attr snapset $dir/bad || return 1
+ rm -f $dir/bad
+ return 0
+}
+
+function TEST_scrub_snaps() {
+ local dir=$1
+ local poolname=test
+ local OBJS=16
+ local OSDS=1
+
+ TESTDATA="testdata.$$"
+
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # All scrubs done manually. Don't want any unexpected scheduled scrubs.
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $OBJS`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+
+ local primary=$(get_primary $poolname obj1)
+
+ create_scenario $dir $poolname $TESTDATA $primary || return 1
+
+ rm -f $TESTDATA
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ activate_osd $dir $osd || return 1
+ done
+ ceph tell osd.* config set osd_shallow_scrub_chunk_max 25
+ ceph tell osd.* config set osd_shallow_scrub_chunk_min 5
+ ceph tell osd.* config set osd_pg_stat_report_interval_max 1
+
+
+ wait_for_clean || return 1
+
+ ceph tell osd.* config get osd_shallow_scrub_chunk_max
+ ceph tell osd.* config get osd_shallow_scrub_chunk_min
+ ceph tell osd.* config get osd_pg_stat_report_interval_max
+ ceph tell osd.* config get osd_scrub_chunk_max
+ ceph tell osd.* config get osd_scrub_chunk_min
+
+ local pgid="${poolid}.0"
+ if ! pg_scrub "$pgid" ; then
+ return 1
+ fi
+
+ test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" = "2" || return 1
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pgid || return 1
+
+ rados list-inconsistent-obj $pgid > $dir/json || return 1
+
+ # The injected snapshot errors with a single copy pool doesn't
+ # see object errors because all the issues are detected by
+ # comparing copies.
+ jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "epoch": 17,
+ "inconsistents": []
+}
+EOF
+
+ jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+
+ rados list-inconsistent-snapset $pgid > $dir/json || return 1
+
+ jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "inconsistents": [
+ {
+ "errors": [
+ "headless"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj1"
+ },
+ {
+ "errors": [
+ "size_mismatch"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj10"
+ },
+ {
+ "errors": [
+ "headless"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj11"
+ },
+ {
+ "errors": [
+ "size_mismatch"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj14"
+ },
+ {
+ "errors": [
+ "headless"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj6"
+ },
+ {
+ "errors": [
+ "headless"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj7"
+ },
+ {
+ "errors": [
+ "size_mismatch"
+ ],
+ "snap": 1,
+ "locator": "",
+ "nspace": "",
+ "name": "obj9"
+ },
+ {
+ "errors": [
+ "headless"
+ ],
+ "snap": 4,
+ "locator": "",
+ "nspace": "",
+ "name": "obj2"
+ },
+ {
+ "errors": [
+ "size_mismatch"
+ ],
+ "snap": 4,
+ "locator": "",
+ "nspace": "",
+ "name": "obj5"
+ },
+ {
+ "errors": [
+ "headless"
+ ],
+ "snap": 7,
+ "locator": "",
+ "nspace": "",
+ "name": "obj2"
+ },
+ {
+ "errors": [
+ "info_missing",
+ "headless"
+ ],
+ "snap": 7,
+ "locator": "",
+ "nspace": "",
+ "name": "obj5"
+ },
+ {
+ "name": "obj10",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "????",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": []
+ },
+ {
+ "extra clones": [
+ 1
+ ],
+ "errors": [
+ "extra_clones"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj11",
+ "snapset": {
+ "seq": 1,
+ "clones": []
+ }
+ },
+ {
+ "name": "obj14",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1033,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": []
+ },
+ {
+ "errors": [
+ "snapset_corrupted"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj15"
+ },
+ {
+ "extra clones": [
+ 7,
+ 4
+ ],
+ "errors": [
+ "snapset_missing",
+ "extra_clones"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj2"
+ },
+ {
+ "errors": [
+ "size_mismatch"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj3",
+ "snapset": {
+ "seq": 3,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ },
+ {
+ "snap": 3,
+ "size": 256,
+ "overlap": "[]",
+ "snaps": [
+ 3,
+ 2
+ ]
+ }
+ ]
+ }
+ },
+ {
+ "missing": [
+ 7
+ ],
+ "errors": [
+ "clone_missing"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj4",
+ "snapset": {
+ "seq": 7,
+ "clones": [
+ {
+ "snap": 7,
+ "size": 1032,
+ "overlap": "[]",
+ "snaps": [
+ 7,
+ 6,
+ 5,
+ 4,
+ 3,
+ 2,
+ 1
+ ]
+ }
+ ]
+ }
+ },
+ {
+ "missing": [
+ 2,
+ 1
+ ],
+ "extra clones": [
+ 7
+ ],
+ "errors": [
+ "extra_clones",
+ "clone_missing"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj5",
+ "snapset": {
+ "seq": 6,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ },
+ {
+ "snap": 2,
+ "size": 256,
+ "overlap": "[]",
+ "snaps": [
+ 2
+ ]
+ },
+ {
+ "snap": 4,
+ "size": 512,
+ "overlap": "[]",
+ "snaps": [
+ 4,
+ 3
+ ]
+ },
+ {
+ "snap": 6,
+ "size": 1024,
+ "overlap": "[]",
+ "snaps": [
+ 6,
+ 5
+ ]
+ }
+ ]
+ }
+ },
+ {
+ "extra clones": [
+ 1
+ ],
+ "errors": [
+ "extra_clones"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj6",
+ "snapset": {
+ "seq": 1,
+ "clones": []
+ }
+ },
+ {
+ "extra clones": [
+ 1
+ ],
+ "errors": [
+ "extra_clones"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj7",
+ "snapset": {
+ "seq": 0,
+ "clones": []
+ }
+ },
+ {
+ "errors": [
+ "snapset_error"
+ ],
+ "snap": "head",
+ "locator": "",
+ "nspace": "",
+ "name": "obj8",
+ "snapset": {
+ "seq": 0,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ }
+ },
+ {
+ "name": "obj9",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": [
+ {
+ "snap": 1,
+ "size": "????",
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": []
+ }
+ ],
+ "epoch": 20
+}
+EOF
+
+ jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ jq '.' $dir/json > save1.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
+ fi
+
+ pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
+ pids=""
+ for pidfile in ${pidfiles}
+ do
+ pids+="$(cat $pidfile) "
+ done
+
+ ERRORS=0
+
+ for i in `seq 1 7`
+ do
+ rados -p $poolname rmsnap snap$i
+ done
+ sleep 5
+ local -i loop=0
+ while ceph pg dump pgs | grep -q snaptrim;
+ do
+ if ceph pg dump pgs | grep -q snaptrim_error;
+ then
+ break
+ fi
+ sleep 2
+ loop+=1
+ if (( $loop >= 10 )) ; then
+ ERRORS=$(expr $ERRORS + 1)
+ break
+ fi
+ done
+ ceph pg dump pgs
+
+ for pid in $pids
+ do
+ if ! kill -0 $pid
+ then
+ echo "OSD Crash occurred"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ kill_daemons $dir || return 1
+
+ declare -a err_strings
+ err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* : is missing in clone_overlap"
+ err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : no '_' attr"
+ err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : is an unexpected clone"
+ err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 : on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
+ err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:2"
+ err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:1"
+ err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head : 2 missing clone[(]s[)]"
+ err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head : snaps.seq not set"
+ err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 : is an unexpected clone"
+ err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head : on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
+ err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 : is an unexpected clone"
+ err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head : no 'snapset' attr"
+ err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 : clone ignored due to missing snapset"
+ err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 : clone ignored due to missing snapset"
+ err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head : expected clone .*:::obj4:7"
+ err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head : 1 missing clone[(]s[)]"
+ err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 : is an unexpected clone"
+ err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 : is missing in clone_size"
+ err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 : is an unexpected clone"
+ err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 : size 1032 != clone_size 1033"
+ err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 20 errors"
+ err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head : can't decode 'snapset' attr "
+ err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired"
+
+ for err_string in "${err_strings[@]}"
+ do
+ if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
+ then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ echo "TEST PASSED"
+ return 0
+}
+
+function _scrub_snaps_multi() {
+ local dir=$1
+ local poolname=test
+ local OBJS=16
+ local OSDS=2
+ local which=$2
+
+ TESTDATA="testdata.$$"
+
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # All scrubs done manually. Don't want any unexpected scheduled scrubs.
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $OBJS`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+
+ local primary=$(get_primary $poolname obj1)
+ local replica=$(get_not_primary $poolname obj1)
+
+ eval create_scenario $dir $poolname $TESTDATA \$$which || return 1
+
+ rm -f $TESTDATA
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ activate_osd $dir $osd || return 1
+ done
+
+ ceph tell osd.* config set osd_shallow_scrub_chunk_max 3
+ ceph tell osd.* config set osd_shallow_scrub_chunk_min 3
+ ceph tell osd.* config set osd_scrub_chunk_min 3
+ ceph tell osd.* config set osd_pg_stat_report_interval_max 1
+ wait_for_clean || return 1
+
+ local pgid="${poolid}.0"
+ if ! pg_scrub "$pgid" ; then
+ return 1
+ fi
+
+ test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" -gt "3" || return 1
+ test "$(grep "_scan_snaps start" $dir/osd.${replica}.log | wc -l)" -gt "3" || return 1
+
+ rados list-inconsistent-pg $poolname > $dir/json || return 1
+ # Check pg count
+ test $(jq '. | length' $dir/json) = "1" || return 1
+ # Check pgid
+ test $(jq -r '.[0]' $dir/json) = $pgid || return 1
+
+ rados list-inconsistent-obj $pgid --format=json-pretty
+
+ rados list-inconsistent-snapset $pgid > $dir/json || return 1
+
+ # Since all of the snapshots on the primary is consistent there are no errors here
+ if [ $which = "replica" ];
+ then
+ scruberrors="20"
+ jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "epoch": 23,
+ "inconsistents": []
+}
+EOF
+
+else
+ scruberrors="30"
+ jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+ "epoch": 23,
+ "inconsistents": [
+ {
+ "name": "obj10",
+ "nspace": "",
+ "locator": "",
+ "snap": 1,
+ "errors": [
+ "size_mismatch"
+ ]
+ },
+ {
+ "name": "obj11",
+ "nspace": "",
+ "locator": "",
+ "snap": 1,
+ "errors": [
+ "headless"
+ ]
+ },
+ {
+ "name": "obj14",
+ "nspace": "",
+ "locator": "",
+ "snap": 1,
+ "errors": [
+ "size_mismatch"
+ ]
+ },
+ {
+ "name": "obj6",
+ "nspace": "",
+ "locator": "",
+ "snap": 1,
+ "errors": [
+ "headless"
+ ]
+ },
+ {
+ "name": "obj7",
+ "nspace": "",
+ "locator": "",
+ "snap": 1,
+ "errors": [
+ "headless"
+ ]
+ },
+ {
+ "name": "obj9",
+ "nspace": "",
+ "locator": "",
+ "snap": 1,
+ "errors": [
+ "size_mismatch"
+ ]
+ },
+ {
+ "name": "obj5",
+ "nspace": "",
+ "locator": "",
+ "snap": 7,
+ "errors": [
+ "info_missing",
+ "headless"
+ ]
+ },
+ {
+ "name": "obj10",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "????",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": []
+ },
+ {
+ "name": "obj11",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": []
+ },
+ "errors": [
+ "extra_clones"
+ ],
+ "extra clones": [
+ 1
+ ]
+ },
+ {
+ "name": "obj14",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1033,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": []
+ },
+ {
+ "name": "obj5",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 6,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ },
+ {
+ "snap": 2,
+ "size": 256,
+ "overlap": "[]",
+ "snaps": [
+ 2
+ ]
+ },
+ {
+ "snap": 4,
+ "size": 512,
+ "overlap": "[]",
+ "snaps": [
+ 4,
+ 3
+ ]
+ },
+ {
+ "snap": 6,
+ "size": 1024,
+ "overlap": "[]",
+ "snaps": [
+ 6,
+ 5
+ ]
+ }
+ ]
+ },
+ "errors": [
+ "extra_clones"
+ ],
+ "extra clones": [
+ 7
+ ]
+ },
+ {
+ "name": "obj6",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": []
+ },
+ "errors": [
+ "extra_clones"
+ ],
+ "extra clones": [
+ 1
+ ]
+ },
+ {
+ "name": "obj7",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 0,
+ "clones": []
+ },
+ "errors": [
+ "extra_clones"
+ ],
+ "extra clones": [
+ 1
+ ]
+ },
+ {
+ "name": "obj8",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 0,
+ "clones": [
+ {
+ "snap": 1,
+ "size": 1032,
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": [
+ "snapset_error"
+ ]
+ },
+ {
+ "name": "obj9",
+ "nspace": "",
+ "locator": "",
+ "snap": "head",
+ "snapset": {
+ "seq": 1,
+ "clones": [
+ {
+ "snap": 1,
+ "size": "????",
+ "overlap": "[]",
+ "snaps": [
+ 1
+ ]
+ }
+ ]
+ },
+ "errors": []
+ }
+ ]
+}
+EOF
+fi
+
+ jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
+ multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+ if test $getjson = "yes"
+ then
+ jq '.' $dir/json > save1.json
+ fi
+
+ if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+ then
+ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
+ fi
+
+ pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
+ pids=""
+ for pidfile in ${pidfiles}
+ do
+ pids+="$(cat $pidfile) "
+ done
+
+ ERRORS=0
+
+ # When removing snapshots with a corrupt replica, it crashes.
+ # See http://tracker.ceph.com/issues/23875
+ if [ $which = "primary" ];
+ then
+ for i in `seq 1 7`
+ do
+ rados -p $poolname rmsnap snap$i
+ done
+ sleep 5
+ local -i loop=0
+ while ceph pg dump pgs | grep -q snaptrim;
+ do
+ if ceph pg dump pgs | grep -q snaptrim_error;
+ then
+ break
+ fi
+ sleep 2
+ loop+=1
+ if (( $loop >= 10 )) ; then
+ ERRORS=$(expr $ERRORS + 1)
+ break
+ fi
+ done
+ fi
+ ceph pg dump pgs
+
+ for pid in $pids
+ do
+ if ! kill -0 $pid
+ then
+ echo "OSD Crash occurred"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ kill_daemons $dir || return 1
+
+ declare -a err_strings
+ err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj4:7 : missing"
+ err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj3:head : size 3840 != size 768 from auth oi"
+ err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:1 : missing"
+ err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:2 : missing"
+ err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj5:4 : size 4608 != size 512 from auth oi"
+ err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7 : failed to pick suitable object info"
+ err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj1:head : missing"
+ err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors"
+
+ for err_string in "${err_strings[@]}"
+ do
+ if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
+ then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ # Check replica specific messages
+ declare -a rep_err_strings
+ osd=$(eval echo \$$which)
+ rep_err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired"
+ for err_string in "${rep_err_strings[@]}"
+ do
+ if ! grep "$err_string" $dir/osd.${osd}.log > /dev/null;
+ then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ echo "TEST PASSED"
+ return 0
+}
+
+function TEST_scrub_snaps_replica() {
+ local dir=$1
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
+ _scrub_snaps_multi $dir replica
+ err=$?
+ CEPH_ARGS=$ORIG_ARGS
+ return $err
+}
+
+function TEST_scrub_snaps_primary() {
+ local dir=$1
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
+ _scrub_snaps_multi $dir primary
+ err=$?
+ CEPH_ARGS=$ORIG_ARGS
+ return $err
+}
+
+main osd-scrub-snaps "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh osd-scrub-snaps.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
new file mode 100755
index 000000000..73f165380
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -0,0 +1,664 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_scrub_test() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local primary=$(get_primary $poolname obj1)
+ local otherosd=$(get_not_primary $poolname obj1)
+ if [ "$otherosd" = "2" ];
+ then
+ local anotherosd="0"
+ else
+ local anotherosd="2"
+ fi
+
+ objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab
+
+ local pgid="${poolid}.0"
+ pg_deep_scrub "$pgid" || return 1
+
+ ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
+ test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
+
+ ceph osd out $primary
+ wait_for_clean || return 1
+
+ pg_deep_scrub "$pgid" || return 1
+
+ test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
+ test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
+ ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
+
+ ceph osd in $primary
+ wait_for_clean || return 1
+
+ repair "$pgid" || return 1
+ wait_for_clean || return 1
+
+ # This sets up the test after we've repaired with previous primary has old value
+ test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
+ ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
+
+ ceph osd out $primary
+ wait_for_clean || return 1
+
+ test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1
+ test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1
+ test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1
+ ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
+}
+
+# Grab year-month-day
+DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/"
+DATEFORMAT="%Y-%m-%d"
+
+function check_dump_scrubs() {
+ local primary=$1
+ local sched_time_check="$2"
+ local deadline_check="$3"
+
+ DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)"
+ # use eval to drop double-quotes
+ eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time')
+ test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1
+ # use eval to drop double-quotes
+ eval DEADLINE=$(echo $DS | jq '.[0].deadline')
+ test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1
+}
+
+function TEST_interval_changes() {
+ local poolname=test
+ local OSDS=2
+ local objects=10
+ # Don't assume how internal defaults are set
+ local day="$(expr 24 \* 60 \* 60)"
+ local week="$(expr $day \* 7)"
+ local min_interval=$day
+ local max_interval=$week
+ local WAIT_FOR_UPDATE=15
+
+ TESTDATA="testdata.$$"
+
+ # This min scrub interval results in 30 seconds backoff time
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local primary=$(get_primary $poolname obj1)
+
+ # Check initial settings from above (min 1 day, min 1 week)
+ check_dump_scrubs $primary "1 day" "1 week" || return 1
+
+ # Change global osd_scrub_min_interval to 2 days
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "2 days" "1 week" || return 1
+
+ # Change global osd_scrub_max_interval to 2 weeks
+ CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "2 days" "2 week" || return 1
+
+ # Change pool osd_scrub_min_interval to 3 days
+ ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "3 days" "2 week" || return 1
+
+ # Change pool osd_scrub_max_interval to 3 weeks
+ ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3)
+ sleep $WAIT_FOR_UPDATE
+ check_dump_scrubs $primary "3 days" "3 week" || return 1
+}
+
+function TEST_scrub_extended_sleep() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ DAY=$(date +%w)
+ # Handle wrap
+ if [ "$DAY" -ge "4" ];
+ then
+ DAY="0"
+ fi
+ # Start after 2 days in case we are near midnight
+ DAY_START=$(expr $DAY + 2)
+ DAY_END=$(expr $DAY + 3)
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --osd_scrub_sleep=0 \
+ --osd_scrub_extended_sleep=20 \
+ --bluestore_cache_autotune=false \
+ --osd_deep_scrub_randomize_ratio=0.0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_scrub_begin_week_day=$DAY_START \
+ --osd_scrub_end_week_day=$DAY_END \
+ || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+
+ # Trigger a scrub on a PG
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph tell $pgid scrub || return 1
+
+ # Allow scrub to start extended sleep
+ PASSED="false"
+ for ((i=0; i < 15; i++)); do
+ if grep -q "scrub state.*, sleeping" $dir/osd.${primary}.log
+ then
+ PASSED="true"
+ break
+ fi
+ sleep 1
+ done
+
+ # Check that extended sleep was triggered
+ if [ $PASSED = "false" ];
+ then
+ return 1
+ fi
+
+ # release scrub to run after extended sleep finishes
+ ceph tell osd.$primary config set osd_scrub_begin_week_day 0
+ ceph tell osd.$primary config set osd_scrub_end_week_day 0
+
+ # Due to extended sleep, the scrub should not be done within 20 seconds
+ # but test up to 10 seconds and make sure it happens by 25 seconds.
+ count=0
+ PASSED="false"
+ for ((i=0; i < 25; i++)); do
+ count=$(expr $count + 1)
+ if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+ # Did scrub run too soon?
+ if [ $count -lt "10" ];
+ then
+ return 1
+ fi
+ PASSED="true"
+ break
+ fi
+ sleep 1
+ done
+
+ # Make sure scrub eventually ran
+ if [ $PASSED = "false" ];
+ then
+ return 1
+ fi
+}
+
+function _scrub_abort() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=1000
+ local type=$2
+
+ TESTDATA="testdata.$$"
+ if test $type = "scrub";
+ then
+ stopscrub="noscrub"
+ check="noscrub"
+ else
+ stopscrub="nodeep-scrub"
+ check="nodeep_scrub"
+ fi
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ # Set scheduler to "wpq" until there's a reliable way to query scrub
+ # states with "--osd-scrub-sleep" set to 0. The "mclock_scheduler"
+ # overrides the scrub sleep to 0 and as a result the checks in the
+ # test fail.
+ run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \
+ --osd_deep_scrub_randomize_ratio=0.0 \
+ --osd_scrub_sleep=5.0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_op_queue=wpq || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local primary=$(get_primary $poolname obj1)
+ local pgid="${poolid}.0"
+
+ ceph tell $pgid $type || return 1
+ # deep-scrub won't start without scrub noticing
+ if [ "$type" = "deep_scrub" ];
+ then
+ ceph tell $pgid scrub || return 1
+ fi
+
+ # Wait for scrubbing to start
+ set -o pipefail
+ found="no"
+ for i in $(seq 0 200)
+ do
+ flush_pg_stats
+ if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing"
+ then
+ found="yes"
+ #ceph pg dump pgs
+ break
+ fi
+ done
+ set +o pipefail
+
+ if test $found = "no";
+ then
+ echo "Scrubbing never started"
+ return 1
+ fi
+
+ ceph osd set $stopscrub
+ if [ "$type" = "deep_scrub" ];
+ then
+ ceph osd set noscrub
+ fi
+
+ # Wait for scrubbing to end
+ set -o pipefail
+ for i in $(seq 0 200)
+ do
+ flush_pg_stats
+ if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing"
+ then
+ continue
+ fi
+ #ceph pg dump pgs
+ break
+ done
+ set +o pipefail
+
+ sleep 5
+
+ if ! grep "$check set, aborting" $dir/osd.${primary}.log
+ then
+ echo "Abort not seen in log"
+ return 1
+ fi
+
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph config set osd "osd_scrub_sleep" "0.1"
+
+ ceph osd unset $stopscrub
+ if [ "$type" = "deep_scrub" ];
+ then
+ ceph osd unset noscrub
+ fi
+ TIMEOUT=$(($objects / 2))
+ wait_for_scrub $pgid "$last_scrub" || return 1
+}
+
+function TEST_scrub_abort() {
+ local dir=$1
+ _scrub_abort $dir scrub
+}
+
+function TEST_deep_scrub_abort() {
+ local dir=$1
+ _scrub_abort $dir deep_scrub
+}
+
+function TEST_scrub_permit_time() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x || return 1
+ local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
+ local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd --bluestore_cache_autotune=false \
+ --osd_deep_scrub_randomize_ratio=0.0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_scrub_begin_hour=$scrub_begin_hour \
+ --osd_scrub_end_hour=$scrub_end_hour || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+
+ # Trigger a scrub on a PG
+ local pgid=$(get_pg $poolname SOMETHING)
+ local primary=$(get_primary $poolname SOMETHING)
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ # If we don't specify an amount of time to subtract from
+ # current time to set last_scrub_stamp, it sets the deadline
+ # back by osd_max_interval which would cause the time permit checking
+ # to be skipped. Set back 1 day, the default scrub_min_interval.
+ ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1
+
+ # Scrub should not run
+ for ((i=0; i < 30; i++)); do
+ if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+ return 1
+ fi
+ sleep 1
+ done
+}
+
+# a test to recreate the problem described in bug #52901 - setting 'noscrub'
+# without explicitly preventing deep scrubs made the PG 'unscrubable'.
+# Fixed by PR#43521
+function TEST_just_deep_scrubs() {
+ local dir=$1
+ local -A cluster_conf=(
+ ['osds_num']="3"
+ ['pgs_in_pool']="4"
+ ['pool_name']="test"
+ )
+
+ standard_scrub_cluster $dir cluster_conf
+ local poolid=${cluster_conf['pool_id']}
+ local poolname=${cluster_conf['pool_name']}
+ echo "Pool: $poolname : $poolid"
+
+ TESTDATA="testdata.$$"
+ local objects=15
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ # set both 'no scrub' & 'no deep-scrub', then request a deep-scrub.
+ # we do not expect to see the scrub scheduled.
+
+ ceph osd set noscrub || return 1
+ ceph osd set nodeep-scrub || return 1
+ sleep 6 # the 'noscrub' command takes a long time to reach the OSDs
+ local now_is=`date -I"ns"`
+ declare -A sched_data
+ local pgid="${poolid}.2"
+
+ # turn on the publishing of test data in the 'scrubber' section of 'pg query' output
+ set_query_debug $pgid
+
+ extract_published_sch $pgid $now_is $now_is sched_data
+ local saved_last_stamp=${sched_data['query_last_stamp']}
+ local dbg_counter_at_start=${sched_data['query_scrub_seq']}
+ echo "test counter @ start: $dbg_counter_at_start"
+
+ ceph pg $pgid deep_scrub
+
+ sleep 5 # 5s is the 'pg dump' interval
+ declare -A sc_data_2
+ extract_published_sch $pgid $now_is $now_is sc_data_2
+ echo "test counter @ should show no change: " ${sc_data_2['query_scrub_seq']}
+ (( ${sc_data_2['dmp_last_duration']} == 0)) || return 1
+ (( ${sc_data_2['query_scrub_seq']} == $dbg_counter_at_start)) || return 1
+
+ # unset the 'no deep-scrub'. Deep scrubbing should start now.
+ ceph osd unset nodeep-scrub || return 1
+ sleep 5
+ declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" )
+ sc_data_2=()
+ echo "test counter @ should be higher than before the unset: " ${sc_data_2['query_scrub_seq']}
+ wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sc_data_2 || return 1
+}
+
+function TEST_dump_scrub_schedule() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+
+ TESTDATA="testdata.$$"
+
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+
+ # Set scheduler to "wpq" until there's a reliable way to query scrub states
+ # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+ # scrub sleep to 0 and as a result the checks in the test fail.
+ local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_scrub_backoff_ratio=0.0 \
+ --osd_op_queue=wpq \
+ --osd_scrub_sleep=0.2"
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd $ceph_osd_args|| return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local pgid="${poolid}.0"
+ local now_is=`date -I"ns"`
+
+ # before the scrubbing starts
+
+ # last scrub duration should be 0. The scheduling data should show
+ # a time in the future:
+ # e.g. 'periodic scrub scheduled @ 2021-10-12T20:32:43.645168+0000'
+
+ declare -A expct_starting=( ['query_active']="false" ['query_is_future']="true" ['query_schedule']="scrub scheduled" )
+ declare -A sched_data
+ extract_published_sch $pgid $now_is "2019-10-12T20:32:43.645168+0000" sched_data
+ schedule_against_expected sched_data expct_starting "initial"
+ (( ${sched_data['dmp_last_duration']} == 0)) || return 1
+ echo "last-scrub --- " ${sched_data['query_last_scrub']}
+
+ #
+ # step 1: scrub once (mainly to ensure there is no urgency to scrub)
+ #
+
+ saved_last_stamp=${sched_data['query_last_stamp']}
+ ceph tell osd.* config set osd_scrub_sleep "0"
+ ceph pg deep-scrub $pgid
+ ceph pg scrub $pgid
+
+ # wait for the 'last duration' entries to change. Note that the 'dump' one will need
+ # up to 5 seconds to sync
+
+ sleep 5
+ sched_data=()
+ declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" )
+ wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sched_data || return 1
+ # verify that 'pg dump' also shows the change in last_scrub_duration
+ sched_data=()
+ declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" )
+ wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1
+
+ sleep 2
+
+ #
+ # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub
+ # scheduled for the future' value
+ #
+
+ ceph tell osd.* config set osd_scrub_chunk_max "3" || return 1
+ ceph tell osd.* config set osd_scrub_sleep "1.0" || return 1
+ ceph osd set noscrub || return 1
+ sleep 2
+ saved_last_stamp=${sched_data['query_last_stamp']}
+
+ ceph pg $pgid scrub
+ sleep 1
+ sched_data=()
+ declare -A expct_scrub_peri_sched=( ['query_is_future']="false" )
+ wait_any_cond $pgid 10 $saved_last_stamp expct_scrub_peri_sched "waitingBeingScheduled" sched_data || return 1
+
+ # note: the induced change in 'last_scrub_stamp' that we've caused above, is by itself not a publish-stats
+ # trigger. Thus it might happen that the information in 'pg dump' will not get updated here. Do not expect
+ # 'dmp_is_future' to follow 'query_is_future' without a good reason
+ ## declare -A expct_scrub_peri_sched_dmp=( ['dmp_is_future']="false" )
+ ## wait_any_cond $pgid 15 $saved_last_stamp expct_scrub_peri_sched_dmp "waitingBeingScheduled" sched_data || echo "must be fixed"
+
+ #
+ # step 3: allow scrubs. Watch for the conditions during the scrubbing
+ #
+
+ saved_last_stamp=${sched_data['query_last_stamp']}
+ ceph osd unset noscrub
+
+ declare -A cond_active=( ['query_active']="true" )
+ sched_data=()
+ wait_any_cond $pgid 10 $saved_last_stamp cond_active "WaitingActive " sched_data || return 1
+
+ # check for pg-dump to show being active. But if we see 'query_active' being reset - we've just
+ # missed it.
+ declare -A cond_active_dmp=( ['dmp_state_has_scrubbing']="true" ['query_active']="false" )
+ sched_data=()
+ wait_any_cond $pgid 10 $saved_last_stamp cond_active_dmp "WaitingActive " sched_data || return 1
+}
+
+function TEST_pg_dump_objects_scrubbed() {
+ local dir=$1
+ local poolname=test
+ local OSDS=3
+ local objects=15
+ local timeout=10
+
+ TESTDATA="testdata.$$"
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # Create a pool with a single pg
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+ for i in `seq 1 $objects`
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ local pgid="${poolid}.0"
+ #Trigger a scrub on a PG
+ pg_scrub $pgid || return 1
+ test "$(ceph pg $pgid query | jq '.info.stats.objects_scrubbed')" '=' $objects || return 1
+
+ teardown $dir || return 1
+}
+
+main osd-scrub-test "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh osd-scrub-test.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-unexpected-clone.sh b/qa/standalone/scrub/osd-unexpected-clone.sh
new file mode 100755
index 000000000..6895bfee6
--- /dev/null
+++ b/qa/standalone/scrub/osd-unexpected-clone.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Intel <contact@intel.com.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Xiaoxi Chen <xiaoxi.chen@intel.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ export -n CEPH_CLI_TEST_DUP_COMMAND
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_recover_unexpected() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+
+ ceph osd pool create foo 1
+ rados -p foo put foo /etc/passwd
+ rados -p foo mksnap snap
+ rados -p foo put foo /etc/group
+
+ wait_for_clean || return 1
+
+ local osd=$(get_primary foo foo)
+
+ JSON=`objectstore_tool $dir $osd --op list foo | grep snapid.:1`
+ echo "JSON is $JSON"
+ rm -f $dir/_ $dir/data
+ objectstore_tool $dir $osd "$JSON" get-attr _ > $dir/_ || return 1
+ objectstore_tool $dir $osd "$JSON" get-bytes $dir/data || return 1
+
+ rados -p foo rmsnap snap
+
+ sleep 5
+
+ objectstore_tool $dir $osd "$JSON" set-bytes $dir/data || return 1
+ objectstore_tool $dir $osd "$JSON" set-attr _ $dir/_ || return 1
+
+ sleep 5
+
+ ceph pg repair 1.0 || return 1
+
+ sleep 10
+
+ ceph log last
+
+ # make sure osds are still up
+ timeout 60 ceph tell osd.0 version || return 1
+ timeout 60 ceph tell osd.1 version || return 1
+ timeout 60 ceph tell osd.2 version || return 1
+}
+
+
+main osd-unexpected-clone "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh
new file mode 100644
index 000000000..6816d71de
--- /dev/null
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -0,0 +1,302 @@
+#!/usr/bin/env bash
+# @file scrub-helpers.sh
+# @brief a collection of bash functions useful for scrub standalone tests
+#
+
+# extract_published_sch()
+#
+# Use the output from both 'ceph pg dump pgs' and 'ceph pg x.x query' commands to determine
+# the published scrub scheduling status of a given PG.
+#
+# $1: pg id
+# $2: 'current' time to compare to
+# $3: an additional time-point to compare to
+# $4: [out] dictionary
+#
+function extract_published_sch() {
+ local pgn="$1"
+ local -n dict=$4 # a ref to the in/out dictionary
+ local current_time=$2
+ local extra_time=$3
+ local extr_dbg=1 # note: 3 and above leave some temp files around
+
+ #turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+
+ (( extr_dbg >= 3 )) && ceph pg dump pgs -f json-pretty >> /tmp/a_dmp$$
+ (( extr_dbg >= 3 )) && ceph pg $1 query -f json-pretty >> /tmp/a_qry$$
+
+ from_dmp=`ceph pg dump pgs -f json-pretty | jq -r --arg pgn "$pgn" --arg extra_dt "$extra_time" --arg current_dt "$current_time" '[
+ [[.pg_stats[]] | group_by(.pg_stats)][0][0] |
+ [.[] |
+ select(has("pgid") and .pgid == $pgn) |
+
+ (.dmp_stat_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|first) else . end)) |
+ (.dmp_when_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
+
+ [ {
+ dmp_pg_state: .state,
+ dmp_state_has_scrubbing: (.state | test(".*scrub.*";"i")),
+ dmp_last_duration:.last_scrub_duration,
+ dmp_schedule: .dmp_stat_part,
+ dmp_schedule_at: .dmp_when_part,
+ dmp_is_future: ( .dmp_when_part > $current_dt ),
+ dmp_vs_date: ( .dmp_when_part > $extra_dt ),
+ dmp_reported_epoch: .reported_epoch,
+ dmp_seq: .reported_seq
+ }] ]][][][]'`
+
+ (( extr_dbg >= 2 )) && echo "from pg dump pg: $from_dmp"
+ (( extr_dbg >= 2 )) && echo "query output:"
+ (( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}'
+
+ from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time" --arg spt "'" '
+ . |
+ (.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) |
+ (.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
+ (.q_when_is_future=(.q_when_part > $current_dt)) |
+ (.q_vs_date=(.q_when_part > $extra_dt)) |
+ {
+ query_epoch: .epoch,
+ query_seq: .info.stats.reported_seq,
+ query_active: (.scrubber | if has("active") then .active else "bug" end),
+ query_schedule: .q_stat_part,
+ query_schedule_at: .q_when_part,
+ query_last_duration: .info.stats.last_scrub_duration,
+ query_last_stamp: .info.history.last_scrub_stamp,
+ query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ),
+ query_is_future: .q_when_is_future,
+ query_vs_date: .q_vs_date,
+ query_scrub_seq: .scrubber.test_sequence
+ }
+ '`
+ (( extr_dbg >= 1 )) && echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'
+
+ # note that using a ref to an associative array directly is tricky. Instead - we are copying:
+ local -A dict_src=`echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'`
+ dict=()
+ for k in "${!dict_src[@]}"; do dict[$k]=${dict_src[$k]}; done
+
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+# query the PG, until any of the conditions in the 'expected' array are met
+#
+# A condition may be negated by an additional entry in the 'expected' array. Its
+# form should be:
+# key: the original key, with a "_neg" suffix;
+# Value: not checked
+#
+# $1: pg id
+# $2: max retries
+# $3: a date to use in comparisons
+# $4: set of K/V conditions
+# $5: debug message
+# $6: [out] the results array
+function wait_any_cond() {
+ local pgid="$1"
+ local retries=$2
+ local cmp_date=$3
+ local -n ep=$4
+ local -n out_array=$6
+ local -A sc_data
+ local extr_dbg=2
+
+ #turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+
+ local now_is=`date -I"ns"`
+ (( extr_dbg >= 2 )) && echo "waiting for any condition ($5): pg:$pgid dt:$cmp_date ($retries retries)"
+
+ for i in $(seq 1 $retries)
+ do
+ sleep 0.5
+ extract_published_sch $pgid $now_is $cmp_date sc_data
+ (( extr_dbg >= 4 )) && echo "${sc_data['dmp_last_duration']}"
+ (( extr_dbg >= 4 )) && echo "----> loop: $i ~ ${sc_data['dmp_last_duration']} / " ${sc_data['query_vs_date']} " / ${sc_data['dmp_is_future']}"
+ (( extr_dbg >= 2 )) && echo "--> loop: $i ~ ${sc_data['query_active']} / ${sc_data['query_seq']} / ${sc_data['dmp_seq']} " \
+ "/ ${sc_data['query_is_future']} / ${sc_data['query_last_stamp']} / ${sc_data['query_schedule']} %%% ${!ep[@]}"
+
+ # perform schedule_against_expected(), but with slightly different out-messages behaviour
+ for k_ref in "${!ep[@]}"
+ do
+ (( extr_dbg >= 3 )) && echo "key is $k_ref"
+ # is this a real key, or just a negation flag for another key??
+ [[ $k_ref =~ "_neg" ]] && continue
+
+ local act_val=${sc_data[$k_ref]}
+ local exp_val=${ep[$k_ref]}
+
+ # possible negation? look for a matching key
+ local neg_key="${k_ref}_neg"
+ (( extr_dbg >= 3 )) && echo "neg-key is $neg_key"
+ if [ -v 'ep[$neg_key]' ]; then
+ is_neg=1
+ else
+ is_neg=0
+ fi
+
+ (( extr_dbg >= 1 )) && echo "key is $k_ref: negation:$is_neg # expected: $exp_val # in actual: $act_val"
+ is_eq=0
+ [[ $exp_val == $act_val ]] && is_eq=1
+ if (($is_eq ^ $is_neg))
+ then
+ echo "$5 - '$k_ref' actual value ($act_val) matches expected ($exp_val) (negation: $is_neg)"
+ for k in "${!sc_data[@]}"; do out_array[$k]=${sc_data[$k]}; done
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+ return 0
+ fi
+ done
+ done
+
+ echo "$5: wait_any_cond(): failure. Note: query-active=${sc_data['query_active']}"
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+ return 1
+}
+
+
+# schedule_against_expected()
+#
+# Compare the scrub scheduling state collected by extract_published_sch() to a set of expected values.
+# All values are expected to match.
+#
+# $1: the published scheduling state
+# $2: a set of conditions to verify
+# $3: text to be echoed for a failed match
+#
+function schedule_against_expected() {
+ local -n dict=$1 # a ref to the published state
+ local -n ep=$2 # the expected results
+ local extr_dbg=1
+
+ # turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+
+ (( extr_dbg >= 1 )) && echo "-- - comparing:"
+ for k_ref in "${!ep[@]}"
+ do
+ local act_val=${dict[$k_ref]}
+ local exp_val=${ep[$k_ref]}
+ (( extr_dbg >= 1 )) && echo "key is " $k_ref " expected: " $exp_val " in actual: " $act_val
+ if [[ $exp_val != $act_val ]]
+ then
+ echo "$3 - '$k_ref' actual value ($act_val) differs from expected ($exp_val)"
+ echo '####################################################^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^'
+
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+ return 1
+ fi
+ done
+
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+ return 0
+}
+
+
+# Start the cluster "nodes" and create a pool for testing.
+#
+# The OSDs are started with a set of parameters aimed in creating a repeatable
+# and stable scrub sequence:
+# - no scrub randomizations/backoffs
+# - no autoscaler
+#
+# $1: the test directory
+# $2: [in/out] an array of configuration values
+#
+# The function adds/updates the configuration dictionary with the name of the
+# pool created, and its ID.
+#
+# Argument 2 might look like this:
+#
+# declare -A test_conf=(
+# ['osds_num']="3"
+# ['pgs_in_pool']="7"
+# ['extras']="--extra1 --extra2"
+# ['pool_name']="testpl"
+# )
+function standard_scrub_cluster() {
+ local dir=$1
+ local -n args=$2
+
+ local OSDS=${args['osds_num']:-"3"}
+ local pg_num=${args['pgs_in_pool']:-"8"}
+ local poolname="${args['pool_name']:-test}"
+ args['pool_name']=$poolname
+ local extra_pars=${args['extras']}
+ local debug_msg=${args['msg']:-"dbg"}
+
+ # turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+
+ run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+ run_mgr $dir x || return 1
+
+ local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+ --osd_scrub_interval_randomize_ratio=0 \
+ --osd_scrub_backoff_ratio=0.0 \
+ --osd_pool_default_pg_autoscale_mode=off \
+ --osd_pg_stat_report_interval_max=1 \
+ $extra_pars"
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd $(echo $ceph_osd_args) || return 1
+ done
+
+ create_pool $poolname $pg_num $pg_num
+ wait_for_clean || return 1
+
+ # update the in/out 'args' with the ID of the new pool
+ sleep 1
+ name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'`
+ echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id"
+ args['pool_id']="${name_n_id##* }"
+ args['osd_args']=$ceph_osd_args
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+
+# Start the cluster "nodes" and create a pool for testing - wpq version.
+#
+# A variant of standard_scrub_cluster() that selects the wpq scheduler and sets a value to
+# osd_scrub_sleep. To be used when the test is attempting to "catch" the scrubber during an
+# ongoing scrub.
+#
+# See standard_scrub_cluster() for more details.
+#
+# $1: the test directory
+# $2: [in/out] an array of configuration values
+# $3: osd_scrub_sleep
+#
+# The function adds/updates the configuration dictionary with the name of the
+# pool created, and its ID.
+function standard_scrub_wpq_cluster() {
+ local dir=$1
+ local -n conf=$2
+ local osd_sleep=$3
+
+ conf['extras']=" --osd_op_queue=wpq --osd_scrub_sleep=$osd_sleep ${conf['extras']}"
+
+ standard_scrub_cluster $dir conf || return 1
+}
+
+
+# A debug flag is set for the PG specified, causing the 'pg query' command to display
+# an additional 'scrub sessions counter' field.
+#
+# $1: PG id
+#
+function set_query_debug() {
+ local pgid=$1
+ local prim_osd=`ceph pg dump pgs_brief | \
+ awk -v pg="^$pgid" -n -e '$0 ~ pg { print(gensub(/[^0-9]*([0-9]+).*/,"\\\\1","g",$5)); }' `
+
+ echo "Setting scrub debug data. Primary for $pgid is $prim_osd"
+ CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.$prim_osd) \
+ scrubdebug $pgid set sessions
+}
+
diff --git a/qa/standalone/special/ceph_objectstore_tool.py b/qa/standalone/special/ceph_objectstore_tool.py
new file mode 100755
index 000000000..98a2c8723
--- /dev/null
+++ b/qa/standalone/special/ceph_objectstore_tool.py
@@ -0,0 +1,2045 @@
+#!/usr/bin/python3
+
+from subprocess import call, check_output, DEVNULL
+
+import filecmp
+import os
+import subprocess
+import math
+import time
+import sys
+import re
+import logging
+import json
+import tempfile
+import platform
+
+logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING,
+ datefmt="%FT%T")
+
+
+def wait_for_health():
+ print("Wait for health_ok...", end="")
+ tries = 0
+ while call("{path}/ceph health 2> /dev/null | grep -v 'HEALTH_OK\|HEALTH_WARN' > /dev/null".format(path=CEPH_BIN), shell=True) == 0:
+ tries += 1
+ if tries == 150:
+ raise Exception("Time exceeded to go to health")
+ time.sleep(1)
+ print("DONE")
+
+
+def get_pool_id(name, nullfd):
+ cmd = "{path}/ceph osd pool stats {pool}".format(pool=name, path=CEPH_BIN).split()
+ # pool {pool} id # .... grab the 4 field
+ return check_output(cmd, stderr=nullfd).decode().split()[3]
+
+
+# return a list of unique PGS given an osd subdirectory
+def get_osd_pgs(SUBDIR, ID):
+ PGS = []
+ if ID:
+ endhead = re.compile("{id}.*_head$".format(id=ID))
+ DIR = os.path.join(SUBDIR, "current")
+ PGS += [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and (ID is None or endhead.match(f))]
+ PGS = [re.sub("_head", "", p) for p in PGS if "_head" in p]
+ return PGS
+
+
+# return a sorted list of unique PGs given a directory
+def get_pgs(DIR, ID):
+ OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0]
+ PGS = []
+ for d in OSDS:
+ SUBDIR = os.path.join(DIR, d)
+ PGS += get_osd_pgs(SUBDIR, ID)
+ return sorted(set(PGS))
+
+
+# return a sorted list of PGS a subset of ALLPGS that contain objects with prefix specified
+def get_objs(ALLPGS, prefix, DIR, ID):
+ OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0]
+ PGS = []
+ for d in OSDS:
+ DIRL2 = os.path.join(DIR, d)
+ SUBDIR = os.path.join(DIRL2, "current")
+ for p in ALLPGS:
+ PGDIR = p + "_head"
+ if not os.path.isdir(os.path.join(SUBDIR, PGDIR)):
+ continue
+ FINALDIR = os.path.join(SUBDIR, PGDIR)
+ # See if there are any objects there
+ if any(f for f in [val for _, _, fl in os.walk(FINALDIR) for val in fl] if f.startswith(prefix)):
+ PGS += [p]
+ return sorted(set(PGS))
+
+
+# return a sorted list of OSDS which have data from a given PG
+def get_osds(PG, DIR):
+ ALLOSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0]
+ OSDS = []
+ for d in ALLOSDS:
+ DIRL2 = os.path.join(DIR, d)
+ SUBDIR = os.path.join(DIRL2, "current")
+ PGDIR = PG + "_head"
+ if not os.path.isdir(os.path.join(SUBDIR, PGDIR)):
+ continue
+ OSDS += [d]
+ return sorted(OSDS)
+
+
+def get_lines(filename):
+ tmpfd = open(filename, "r")
+ line = True
+ lines = []
+ while line:
+ line = tmpfd.readline().rstrip('\n')
+ if line:
+ lines += [line]
+ tmpfd.close()
+ os.unlink(filename)
+ return lines
+
+
+def cat_file(level, filename):
+ if level < logging.getLogger().getEffectiveLevel():
+ return
+ print("File: " + filename)
+ with open(filename, "r") as f:
+ while True:
+ line = f.readline().rstrip('\n')
+ if not line:
+ break
+ print(line)
+ print("<EOF>")
+
+
+def vstart(new, opt="-o osd_pool_default_pg_autoscale_mode=off"):
+ print("vstarting....", end="")
+ NEW = new and "-n" or "-k"
+ call("MON=1 OSD=4 MDS=0 MGR=1 CEPH_PORT=7400 MGR_PYTHON_PATH={path}/src/pybind/mgr {path}/src/vstart.sh --filestore --short -l {new} -d {opt} > /dev/null 2>&1".format(new=NEW, opt=opt, path=CEPH_ROOT), shell=True)
+ print("DONE")
+
+
+def test_failure(cmd, errmsg, tty=False):
+ if tty:
+ try:
+ ttyfd = open("/dev/tty", "rwb")
+ except Exception as e:
+ logging.info(str(e))
+ logging.info("SKIP " + cmd)
+ return 0
+ TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid())
+ tmpfd = open(TMPFILE, "wb")
+
+ logging.debug(cmd)
+ if tty:
+ ret = call(cmd, shell=True, stdin=ttyfd, stdout=ttyfd, stderr=tmpfd)
+ ttyfd.close()
+ else:
+ ret = call(cmd, shell=True, stderr=tmpfd)
+ tmpfd.close()
+ if ret == 0:
+ logging.error(cmd)
+ logging.error("Should have failed, but got exit 0")
+ return 1
+ lines = get_lines(TMPFILE)
+ matched = [ l for l in lines if errmsg in l ]
+ if any(matched):
+ logging.info("Correctly failed with message \"" + matched[0] + "\"")
+ return 0
+ else:
+ logging.error("Command: " + cmd )
+ logging.error("Bad messages to stderr \"" + str(lines) + "\"")
+ logging.error("Expected \"" + errmsg + "\"")
+ return 1
+
+
+def get_nspace(num):
+ if num == 0:
+ return ""
+ return "ns{num}".format(num=num)
+
+
+def verify(DATADIR, POOL, NAME_PREFIX, db):
+ TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid())
+ ERRORS = 0
+ for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(NAME_PREFIX) == 0]:
+ nsfile = rawnsfile.split("__")[0]
+ clone = rawnsfile.split("__")[1]
+ nspace = nsfile.split("-")[0]
+ file = nsfile.split("-")[1]
+ # Skip clones
+ if clone != "head":
+ continue
+ path = os.path.join(DATADIR, rawnsfile)
+ try:
+ os.unlink(TMPFILE)
+ except:
+ pass
+ cmd = "{path}/rados -p {pool} -N '{nspace}' get {file} {out}".format(pool=POOL, file=file, out=TMPFILE, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL)
+ cmd = "diff -q {src} {result}".format(src=path, result=TMPFILE)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("{file} data not imported properly".format(file=file))
+ ERRORS += 1
+ try:
+ os.unlink(TMPFILE)
+ except:
+ pass
+ for key, val in db[nspace][file]["xattr"].items():
+ cmd = "{path}/rados -p {pool} -N '{nspace}' getxattr {name} {key}".format(pool=POOL, name=file, key=key, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ getval = check_output(cmd, shell=True, stderr=DEVNULL).decode()
+ logging.debug("getxattr {key} {val}".format(key=key, val=getval))
+ if getval != val:
+ logging.error("getxattr of key {key} returned wrong val: {get} instead of {orig}".format(key=key, get=getval, orig=val))
+ ERRORS += 1
+ continue
+ hdr = db[nspace][file].get("omapheader", "")
+ cmd = "{path}/rados -p {pool} -N '{nspace}' getomapheader {name} {file}".format(pool=POOL, name=file, nspace=nspace, file=TMPFILE, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=DEVNULL)
+ if ret != 0:
+ logging.error("rados getomapheader returned {ret}".format(ret=ret))
+ ERRORS += 1
+ else:
+ getlines = get_lines(TMPFILE)
+ assert(len(getlines) == 0 or len(getlines) == 1)
+ if len(getlines) == 0:
+ gethdr = ""
+ else:
+ gethdr = getlines[0]
+ logging.debug("header: {hdr}".format(hdr=gethdr))
+ if gethdr != hdr:
+ logging.error("getomapheader returned wrong val: {get} instead of {orig}".format(get=gethdr, orig=hdr))
+ ERRORS += 1
+ for key, val in db[nspace][file]["omap"].items():
+ cmd = "{path}/rados -p {pool} -N '{nspace}' getomapval {name} {key} {file}".format(pool=POOL, name=file, key=key, nspace=nspace, file=TMPFILE, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=DEVNULL)
+ if ret != 0:
+ logging.error("getomapval returned {ret}".format(ret=ret))
+ ERRORS += 1
+ continue
+ getlines = get_lines(TMPFILE)
+ if len(getlines) != 1:
+ logging.error("Bad data from getomapval {lines}".format(lines=getlines))
+ ERRORS += 1
+ continue
+ getval = getlines[0]
+ logging.debug("getomapval {key} {val}".format(key=key, val=getval))
+ if getval != val:
+ logging.error("getomapval returned wrong val: {get} instead of {orig}".format(get=getval, orig=val))
+ ERRORS += 1
+ try:
+ os.unlink(TMPFILE)
+ except:
+ pass
+ return ERRORS
+
+
+def check_journal(jsondict):
+ errors = 0
+ if 'header' not in jsondict:
+ logging.error("Key 'header' not in dump-journal")
+ errors += 1
+ elif 'max_size' not in jsondict['header']:
+ logging.error("Key 'max_size' not in dump-journal header")
+ errors += 1
+ else:
+ print("\tJournal max_size = {size}".format(size=jsondict['header']['max_size']))
+ if 'entries' not in jsondict:
+ logging.error("Key 'entries' not in dump-journal output")
+ errors += 1
+ elif len(jsondict['entries']) == 0:
+ logging.info("No entries in journal found")
+ else:
+ errors += check_journal_entries(jsondict['entries'])
+ return errors
+
+
+def check_journal_entries(entries):
+ errors = 0
+ for enum in range(len(entries)):
+ if 'offset' not in entries[enum]:
+ logging.error("No 'offset' key in entry {e}".format(e=enum))
+ errors += 1
+ if 'seq' not in entries[enum]:
+ logging.error("No 'seq' key in entry {e}".format(e=enum))
+ errors += 1
+ if 'transactions' not in entries[enum]:
+ logging.error("No 'transactions' key in entry {e}".format(e=enum))
+ errors += 1
+ elif len(entries[enum]['transactions']) == 0:
+ logging.error("No transactions found in entry {e}".format(e=enum))
+ errors += 1
+ else:
+ errors += check_entry_transactions(entries[enum], enum)
+ return errors
+
+
+def check_entry_transactions(entry, enum):
+ errors = 0
+ for tnum in range(len(entry['transactions'])):
+ if 'trans_num' not in entry['transactions'][tnum]:
+ logging.error("Key 'trans_num' missing from entry {e} trans {t}".format(e=enum, t=tnum))
+ errors += 1
+ elif entry['transactions'][tnum]['trans_num'] != tnum:
+ ft = entry['transactions'][tnum]['trans_num']
+ logging.error("Bad trans_num ({ft}) entry {e} trans {t}".format(ft=ft, e=enum, t=tnum))
+ errors += 1
+ if 'ops' not in entry['transactions'][tnum]:
+ logging.error("Key 'ops' missing from entry {e} trans {t}".format(e=enum, t=tnum))
+ errors += 1
+ else:
+ errors += check_transaction_ops(entry['transactions'][tnum]['ops'], enum, tnum)
+ return errors
+
+
+def check_transaction_ops(ops, enum, tnum):
+ if len(ops) == 0:
+ logging.warning("No ops found in entry {e} trans {t}".format(e=enum, t=tnum))
+ errors = 0
+ for onum in range(len(ops)):
+ if 'op_num' not in ops[onum]:
+ logging.error("Key 'op_num' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum))
+ errors += 1
+ elif ops[onum]['op_num'] != onum:
+ fo = ops[onum]['op_num']
+ logging.error("Bad op_num ({fo}) from entry {e} trans {t} op {o}".format(fo=fo, e=enum, t=tnum, o=onum))
+ errors += 1
+ if 'op_name' not in ops[onum]:
+ logging.error("Key 'op_name' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum))
+ errors += 1
+ return errors
+
+
+def test_dump_journal(CFSD_PREFIX, osds):
+ ERRORS = 0
+ pid = os.getpid()
+ TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid)
+
+ for osd in osds:
+ # Test --op dump-journal by loading json
+ cmd = (CFSD_PREFIX + "--op dump-journal --format json").format(osd=osd)
+ logging.debug(cmd)
+ tmpfd = open(TMPFILE, "wb")
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+ ERRORS += 1
+ continue
+ tmpfd.close()
+ tmpfd = open(TMPFILE, "r")
+ jsondict = json.load(tmpfd)
+ tmpfd.close()
+ os.unlink(TMPFILE)
+
+ journal_errors = check_journal(jsondict)
+ if journal_errors != 0:
+ logging.error(jsondict)
+ ERRORS += journal_errors
+
+ return ERRORS
+
+CEPH_BUILD_DIR = os.environ.get('CEPH_BUILD_DIR')
+CEPH_BIN = os.environ.get('CEPH_BIN')
+CEPH_ROOT = os.environ.get('CEPH_ROOT')
+
+if not CEPH_BUILD_DIR:
+ CEPH_BUILD_DIR=os.getcwd()
+ os.putenv('CEPH_BUILD_DIR', CEPH_BUILD_DIR)
+ CEPH_BIN=os.path.join(CEPH_BUILD_DIR, 'bin')
+ os.putenv('CEPH_BIN', CEPH_BIN)
+ CEPH_ROOT=os.path.dirname(CEPH_BUILD_DIR)
+ os.putenv('CEPH_ROOT', CEPH_ROOT)
+ CEPH_LIB=os.path.join(CEPH_BUILD_DIR, 'lib')
+ os.putenv('CEPH_LIB', CEPH_LIB)
+
+try:
+ os.mkdir("td")
+except:
+ pass # ok if this is already there
+CEPH_DIR = os.path.join(CEPH_BUILD_DIR, os.path.join("td", "cot_dir"))
+CEPH_CONF = os.path.join(CEPH_DIR, 'ceph.conf')
+
+def kill_daemons():
+ call("{path}/init-ceph -c {conf} stop > /dev/null 2>&1".format(conf=CEPH_CONF, path=CEPH_BIN), shell=True)
+
+
+def check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME):
+ repcount = 0
+ ERRORS = 0
+ for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(SPLIT_NAME) == 0]:
+ nsfile = rawnsfile.split("__")[0]
+ clone = rawnsfile.split("__")[1]
+ nspace = nsfile.split("-")[0]
+ file = nsfile.split("-")[1] + "__" + clone
+ # Skip clones
+ if clone != "head":
+ continue
+ path = os.path.join(DATADIR, rawnsfile)
+ tmpfd = open(TMPFILE, "wb")
+ cmd = "find {dir} -name '{file}_*_{nspace}_*'".format(dir=OSDDIR, file=file, nspace=nspace)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret:
+ logging.critical("INTERNAL ERROR")
+ return 1
+ tmpfd.close()
+ obj_locs = get_lines(TMPFILE)
+ if len(obj_locs) == 0:
+ logging.error("Can't find imported object {name}".format(name=file))
+ ERRORS += 1
+ for obj_loc in obj_locs:
+ # For btrfs skip snap_* dirs
+ if re.search("/snap_[0-9]*/", obj_loc) is not None:
+ continue
+ repcount += 1
+ cmd = "diff -q {src} {obj_loc}".format(src=path, obj_loc=obj_loc)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("{file} data not imported properly into {obj}".format(file=file, obj=obj_loc))
+ ERRORS += 1
+ return ERRORS, repcount
+
+
+def set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight):
+ # change the weight of osd.0 to math.pi in the newest osdmap of given osd
+ osdmap_file = tempfile.NamedTemporaryFile(delete=True)
+ cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path,
+ osdmap_file=osdmap_file.name)
+ output = check_output(cmd, shell=True).decode()
+ epoch = int(re.findall('#(\d+)', output)[0])
+
+ new_crush_file = tempfile.NamedTemporaryFile(delete=True)
+ old_crush_file = tempfile.NamedTemporaryFile(delete=True)
+ ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
+ crush_file=old_crush_file.name, path=CEPH_BIN),
+ stdout=DEVNULL,
+ stderr=DEVNULL,
+ shell=True)
+ assert(ret == 0)
+
+ for osd_id in osd_ids:
+ cmd = "{path}/crushtool -i {crush_file} --reweight-item osd.{osd} {weight} -o {new_crush_file}".format(osd=osd_id,
+ crush_file=old_crush_file.name,
+ weight=weight,
+ new_crush_file=new_crush_file.name, path=CEPH_BIN)
+ ret = call(cmd, stdout=DEVNULL, shell=True)
+ assert(ret == 0)
+ old_crush_file, new_crush_file = new_crush_file, old_crush_file
+
+ # change them back, since we don't need to preapre for another round
+ old_crush_file, new_crush_file = new_crush_file, old_crush_file
+ old_crush_file.close()
+
+ ret = call("{path}/osdmaptool --import-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
+ crush_file=new_crush_file.name, path=CEPH_BIN),
+ stdout=DEVNULL,
+ stderr=DEVNULL,
+ shell=True)
+ assert(ret == 0)
+
+ # Minimum test of --dry-run by using it, but not checking anything
+ cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force --dry-run"
+ cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch)
+ ret = call(cmd, stdout=DEVNULL, shell=True)
+ assert(ret == 0)
+
+ # osdmaptool increases the epoch of the changed osdmap, so we need to force the tool
+ # to use use a different epoch than the one in osdmap
+ cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force"
+ cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch)
+ ret = call(cmd, stdout=DEVNULL, shell=True)
+
+ return ret == 0
+
+def get_osd_weights(CFSD_PREFIX, osd_ids, osd_path):
+ osdmap_file = tempfile.NamedTemporaryFile(delete=True)
+ cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path,
+ osdmap_file=osdmap_file.name)
+ ret = call(cmd, stdout=DEVNULL, shell=True)
+ if ret != 0:
+ return None
+ # we have to read the weights from the crush map, even we can query the weights using
+ # osdmaptool, but please keep in mind, they are different:
+ # item weights in crush map versus weight associated with each osd in osdmap
+ crush_file = tempfile.NamedTemporaryFile(delete=True)
+ ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
+ crush_file=crush_file.name, path=CEPH_BIN),
+ stdout=DEVNULL,
+ shell=True)
+ assert(ret == 0)
+ output = check_output("{path}/crushtool --tree -i {crush_file} | tail -n {num_osd}".format(crush_file=crush_file.name,
+ num_osd=len(osd_ids), path=CEPH_BIN),
+ stderr=DEVNULL,
+ shell=True).decode()
+ weights = []
+ for line in output.strip().split('\n'):
+ print(line)
+ linev = re.split('\s+', line)
+ if linev[0] == '':
+ linev.pop(0)
+ print('linev %s' % linev)
+ weights.append(float(linev[2]))
+
+ return weights
+
+
+def test_get_set_osdmap(CFSD_PREFIX, osd_ids, osd_paths):
+ print("Testing get-osdmap and set-osdmap")
+ errors = 0
+ kill_daemons()
+ weight = 1 / math.e # just some magic number in [0, 1]
+ changed = []
+ for osd_path in osd_paths:
+ if set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight):
+ changed.append(osd_path)
+ else:
+ logging.warning("Failed to change the weights: {0}".format(osd_path))
+ # i am pissed off if none of the store gets changed
+ if not changed:
+ errors += 1
+
+ for osd_path in changed:
+ weights = get_osd_weights(CFSD_PREFIX, osd_ids, osd_path)
+ if not weights:
+ errors += 1
+ continue
+ if any(abs(w - weight) > 1e-5 for w in weights):
+ logging.warning("Weight is not changed: {0} != {1}".format(weights, weight))
+ errors += 1
+ return errors
+
+def test_get_set_inc_osdmap(CFSD_PREFIX, osd_path):
+ # incrementals are not used unless we need to build an MOSDMap to update
+ # OSD's peers, so an obvious way to test it is simply overwrite an epoch
+ # with a different copy, and read it back to see if it matches.
+ kill_daemons()
+ file_e2 = tempfile.NamedTemporaryFile(delete=True)
+ cmd = (CFSD_PREFIX + "--op get-inc-osdmap --file {file}").format(osd=osd_path,
+ file=file_e2.name)
+ output = check_output(cmd, shell=True).decode()
+ epoch = int(re.findall('#(\d+)', output)[0])
+ # backup e1 incremental before overwriting it
+ epoch -= 1
+ file_e1_backup = tempfile.NamedTemporaryFile(delete=True)
+ cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}"
+ ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True)
+ if ret: return 1
+ # overwrite e1 with e2
+ cmd = CFSD_PREFIX + "--op set-inc-osdmap --force --epoch {epoch} --file {file}"
+ ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e2.name), shell=True)
+ if ret: return 1
+ # Use dry-run to set back to e1 which shouldn't happen
+ cmd = CFSD_PREFIX + "--op set-inc-osdmap --dry-run --epoch {epoch} --file {file}"
+ ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True)
+ if ret: return 1
+ # read from e1
+ file_e1_read = tempfile.NamedTemporaryFile(delete=True)
+ cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}"
+ ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_read.name), shell=True)
+ if ret: return 1
+ errors = 0
+ try:
+ if not filecmp.cmp(file_e2.name, file_e1_read.name, shallow=False):
+ logging.error("{{get,set}}-inc-osdmap mismatch {0} != {1}".format(file_e2.name, file_e1_read.name))
+ errors += 1
+ finally:
+ # revert the change with file_e1_backup
+ cmd = CFSD_PREFIX + "--op set-inc-osdmap --epoch {epoch} --file {file}"
+ ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True)
+ if ret:
+ logging.error("Failed to revert the changed inc-osdmap")
+ errors += 1
+
+ return errors
+
+
+def test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS):
+ # Test removeall
+ TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid())
+ nullfd = open(os.devnull, "w")
+ errors=0
+ print("Test removeall")
+ kill_daemons()
+ test_force_remove = 0
+ for nspace in db.keys():
+ for basename in db[nspace].keys():
+ JSON = db[nspace][basename]['json']
+ for pg in OBJREPPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ for osd in OSDS:
+ DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+ fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+ and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+ if not fnames:
+ continue
+
+ if int(basename.split(REP_NAME)[1]) <= int(NUM_CLONED_REP_OBJECTS):
+ cmd = (CFSD_PREFIX + "'{json}' remove").format(osd=osd, json=JSON)
+ errors += test_failure(cmd, "Clones are present, use removeall to delete everything")
+ if not test_force_remove:
+
+ cmd = (CFSD_PREFIX + " '{json}' set-attr snapset /dev/null").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Test set-up to corrupt snapset failed for {json}".format(json=JSON))
+ errors += 1
+ # Do the removeall since this test failed to set-up
+ else:
+ test_force_remove = 1
+
+ cmd = (CFSD_PREFIX + " '{json}' --force remove").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("forced remove with corrupt snapset failed for {json}".format(json=JSON))
+ errors += 1
+ continue
+
+ cmd = (CFSD_PREFIX + " --force --dry-run '{json}' remove").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("remove with --force failed for {json}".format(json=JSON))
+ errors += 1
+
+ cmd = (CFSD_PREFIX + " --dry-run '{json}' removeall").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("removeall failed for {json}".format(json=JSON))
+ errors += 1
+
+ cmd = (CFSD_PREFIX + " '{json}' removeall").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("removeall failed for {json}".format(json=JSON))
+ errors += 1
+
+ tmpfd = open(TMPFILE, "w")
+ cmd = (CFSD_PREFIX + "--op list --pgid {pg} --namespace {ns} {name}").format(osd=osd, pg=pg, ns=nspace, name=basename)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+ errors += 1
+ tmpfd.close()
+ lines = get_lines(TMPFILE)
+ if len(lines) != 0:
+ logging.error("Removeall didn't remove all objects {ns}/{name} : {lines}".format(ns=nspace, name=basename, lines=lines))
+ errors += 1
+ vstart(new=False)
+ wait_for_health()
+ cmd = "{path}/rados -p {pool} rmsnap snap1".format(pool=REP_POOL, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("rados rmsnap failed")
+ errors += 1
+ time.sleep(2)
+ wait_for_health()
+ return errors
+
+
+def main(argv):
+ stdout = sys.stdout.buffer
+ if len(argv) > 1 and argv[1] == "debug":
+ nullfd = stdout
+ else:
+ nullfd = DEVNULL
+
+ call("rm -fr {dir}; mkdir -p {dir}".format(dir=CEPH_DIR), shell=True)
+ os.chdir(CEPH_DIR)
+ os.environ["CEPH_DIR"] = CEPH_DIR
+ OSDDIR = "dev"
+ REP_POOL = "rep_pool"
+ REP_NAME = "REPobject"
+ EC_POOL = "ec_pool"
+ EC_NAME = "ECobject"
+ if len(argv) > 0 and argv[0] == 'large':
+ PG_COUNT = 12
+ NUM_REP_OBJECTS = 200
+ NUM_CLONED_REP_OBJECTS = 50
+ NUM_EC_OBJECTS = 12
+ NUM_NSPACES = 4
+ # Larger data sets for first object per namespace
+ DATALINECOUNT = 50000
+ # Number of objects to do xattr/omap testing on
+ ATTR_OBJS = 10
+ else:
+ PG_COUNT = 4
+ NUM_REP_OBJECTS = 2
+ NUM_CLONED_REP_OBJECTS = 2
+ NUM_EC_OBJECTS = 2
+ NUM_NSPACES = 2
+ # Larger data sets for first object per namespace
+ DATALINECOUNT = 10
+ # Number of objects to do xattr/omap testing on
+ ATTR_OBJS = 2
+ ERRORS = 0
+ pid = os.getpid()
+ TESTDIR = "/tmp/test.{pid}".format(pid=pid)
+ DATADIR = "/tmp/data.{pid}".format(pid=pid)
+ CFSD_PREFIX = CEPH_BIN + "/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} "
+ PROFNAME = "testecprofile"
+
+ os.environ['CEPH_CONF'] = CEPH_CONF
+ vstart(new=True)
+ wait_for_health()
+
+ cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=REP_POOL, pg=PG_COUNT, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ time.sleep(2)
+ REPID = get_pool_id(REP_POOL, nullfd)
+
+ print("Created Replicated pool #{repid}".format(repid=REPID))
+
+ cmd = "{path}/ceph osd erasure-code-profile set {prof} crush-failure-domain=osd".format(prof=PROFNAME, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ cmd = "{path}/ceph osd erasure-code-profile get {prof}".format(prof=PROFNAME, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ cmd = "{path}/ceph osd pool create {pool} {pg} {pg} erasure {prof}".format(pool=EC_POOL, prof=PROFNAME, pg=PG_COUNT, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ ECID = get_pool_id(EC_POOL, nullfd)
+
+ print("Created Erasure coded pool #{ecid}".format(ecid=ECID))
+
+ print("Creating {objs} objects in replicated pool".format(objs=(NUM_REP_OBJECTS*NUM_NSPACES)))
+ cmd = "mkdir -p {datadir}".format(datadir=DATADIR)
+ logging.debug(cmd)
+ call(cmd, shell=True)
+
+ db = {}
+
+ objects = range(1, NUM_REP_OBJECTS + 1)
+ nspaces = range(NUM_NSPACES)
+ for n in nspaces:
+ nspace = get_nspace(n)
+
+ db[nspace] = {}
+
+ for i in objects:
+ NAME = REP_NAME + "{num}".format(num=i)
+ LNAME = nspace + "-" + NAME
+ DDNAME = os.path.join(DATADIR, LNAME)
+ DDNAME += "__head"
+
+ cmd = "rm -f " + DDNAME
+ logging.debug(cmd)
+ call(cmd, shell=True)
+
+ if i == 1:
+ dataline = range(DATALINECOUNT)
+ else:
+ dataline = range(1)
+ fd = open(DDNAME, "w")
+ data = "This is the replicated data for " + LNAME + "\n"
+ for _ in dataline:
+ fd.write(data)
+ fd.close()
+
+ cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=nullfd)
+ if ret != 0:
+ logging.critical("Rados put command failed with {ret}".format(ret=ret))
+ return 1
+
+ db[nspace][NAME] = {}
+
+ if i < ATTR_OBJS + 1:
+ keys = range(i)
+ else:
+ keys = range(0)
+ db[nspace][NAME]["xattr"] = {}
+ for k in keys:
+ if k == 0:
+ continue
+ mykey = "key{i}-{k}".format(i=i, k=k)
+ myval = "val{i}-{k}".format(i=i, k=k)
+ cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("setxattr failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ db[nspace][NAME]["xattr"][mykey] = myval
+
+ # Create omap header in all objects but REPobject1
+ if i < ATTR_OBJS + 1 and i != 1:
+ myhdr = "hdr{i}".format(i=i)
+ cmd = "{path}/rados -p {pool} -N '{nspace}' setomapheader {name} {hdr}".format(pool=REP_POOL, name=NAME, hdr=myhdr, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.critical("setomapheader failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ db[nspace][NAME]["omapheader"] = myhdr
+
+ db[nspace][NAME]["omap"] = {}
+ for k in keys:
+ if k == 0:
+ continue
+ mykey = "okey{i}-{k}".format(i=i, k=k)
+ myval = "oval{i}-{k}".format(i=i, k=k)
+ cmd = "{path}/rados -p {pool} -N '{nspace}' setomapval {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.critical("setomapval failed with {ret}".format(ret=ret))
+ db[nspace][NAME]["omap"][mykey] = myval
+
+ # Create some clones
+ cmd = "{path}/rados -p {pool} mksnap snap1".format(pool=REP_POOL, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True)
+
+ objects = range(1, NUM_CLONED_REP_OBJECTS + 1)
+ nspaces = range(NUM_NSPACES)
+ for n in nspaces:
+ nspace = get_nspace(n)
+
+ for i in objects:
+ NAME = REP_NAME + "{num}".format(num=i)
+ LNAME = nspace + "-" + NAME
+ DDNAME = os.path.join(DATADIR, LNAME)
+ # First clone
+ CLONENAME = DDNAME + "__1"
+ DDNAME += "__head"
+
+ cmd = "mv -f " + DDNAME + " " + CLONENAME
+ logging.debug(cmd)
+ call(cmd, shell=True)
+
+ if i == 1:
+ dataline = range(DATALINECOUNT)
+ else:
+ dataline = range(1)
+ fd = open(DDNAME, "w")
+ data = "This is the replicated data after a snapshot for " + LNAME + "\n"
+ for _ in dataline:
+ fd.write(data)
+ fd.close()
+
+ cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=nullfd)
+ if ret != 0:
+ logging.critical("Rados put command failed with {ret}".format(ret=ret))
+ return 1
+
+ print("Creating {objs} objects in erasure coded pool".format(objs=(NUM_EC_OBJECTS*NUM_NSPACES)))
+
+ objects = range(1, NUM_EC_OBJECTS + 1)
+ nspaces = range(NUM_NSPACES)
+ for n in nspaces:
+ nspace = get_nspace(n)
+
+ for i in objects:
+ NAME = EC_NAME + "{num}".format(num=i)
+ LNAME = nspace + "-" + NAME
+ DDNAME = os.path.join(DATADIR, LNAME)
+ DDNAME += "__head"
+
+ cmd = "rm -f " + DDNAME
+ logging.debug(cmd)
+ call(cmd, shell=True)
+
+ if i == 1:
+ dataline = range(DATALINECOUNT)
+ else:
+ dataline = range(1)
+ fd = open(DDNAME, "w")
+ data = "This is the erasure coded data for " + LNAME + "\n"
+ for j in dataline:
+ fd.write(data)
+ fd.close()
+
+ cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=EC_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=nullfd)
+ if ret != 0:
+ logging.critical("Erasure coded pool creation failed with {ret}".format(ret=ret))
+ return 1
+
+ db[nspace][NAME] = {}
+
+ db[nspace][NAME]["xattr"] = {}
+ if i < ATTR_OBJS + 1:
+ keys = range(i)
+ else:
+ keys = range(0)
+ for k in keys:
+ if k == 0:
+ continue
+ mykey = "key{i}-{k}".format(i=i, k=k)
+ myval = "val{i}-{k}".format(i=i, k=k)
+ cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=EC_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("setxattr failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ db[nspace][NAME]["xattr"][mykey] = myval
+
+ # Omap isn't supported in EC pools
+ db[nspace][NAME]["omap"] = {}
+
+ logging.debug(db)
+
+ kill_daemons()
+
+ if ERRORS:
+ logging.critical("Unable to set up test")
+ return 1
+
+ ALLREPPGS = get_pgs(OSDDIR, REPID)
+ logging.debug(ALLREPPGS)
+ ALLECPGS = get_pgs(OSDDIR, ECID)
+ logging.debug(ALLECPGS)
+
+ OBJREPPGS = get_objs(ALLREPPGS, REP_NAME, OSDDIR, REPID)
+ logging.debug(OBJREPPGS)
+ OBJECPGS = get_objs(ALLECPGS, EC_NAME, OSDDIR, ECID)
+ logging.debug(OBJECPGS)
+
+ ONEPG = ALLREPPGS[0]
+ logging.debug(ONEPG)
+ osds = get_osds(ONEPG, OSDDIR)
+ ONEOSD = osds[0]
+ logging.debug(ONEOSD)
+
+ print("Test invalid parameters")
+ # On export can't use stdout to a terminal
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg}").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True)
+
+ # On export can't use stdout to a terminal
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True)
+
+ # Prep a valid ec export file for import failure tests
+ ONEECPG = ALLECPGS[0]
+ osds = get_osds(ONEECPG, OSDDIR)
+ ONEECOSD = osds[0]
+ OTHERFILE = "/tmp/foo.{pid}".format(pid=pid)
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEECOSD, pg=ONEECPG, file=OTHERFILE)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+
+ os.unlink(OTHERFILE)
+
+ # Prep a valid export file for import failure tests
+ OTHERFILE = "/tmp/foo.{pid}".format(pid=pid)
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEOSD, pg=ONEPG, file=OTHERFILE)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+
+ # On import can't specify a different pgid than the file
+ TMPPG="{pool}.80".format(pool=REPID)
+ cmd = (CFSD_PREFIX + "--op import --pgid 12.dd --file {file}").format(osd=ONEOSD, pg=TMPPG, file=OTHERFILE)
+ ERRORS += test_failure(cmd, "specified pgid 12.dd does not match actual pgid")
+
+ os.unlink(OTHERFILE)
+ cmd = (CFSD_PREFIX + "--op import --file {FOO}").format(osd=ONEOSD, FOO=OTHERFILE)
+ ERRORS += test_failure(cmd, "file: {FOO}: No such file or directory".format(FOO=OTHERFILE))
+
+ cmd = "{path}/ceph-objectstore-tool --no-mon-config --data-path BAD_DATA_PATH --op list".format(path=CEPH_BIN)
+ ERRORS += test_failure(cmd, "data-path: BAD_DATA_PATH: No such file or directory")
+
+ cmd = (CFSD_PREFIX + "--journal-path BAD_JOURNAL_PATH --op list").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "journal-path: BAD_JOURNAL_PATH: No such file or directory")
+
+ cmd = (CFSD_PREFIX + "--journal-path /bin --op list").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "journal-path: /bin: (21) Is a directory")
+
+ # On import can't use stdin from a terminal
+ cmd = (CFSD_PREFIX + "--op import --pgid {pg}").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True)
+
+ # On import can't use stdin from a terminal
+ cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True)
+
+ # Specify a bad --type
+ os.mkdir(OSDDIR + "/fakeosd")
+ cmd = ("{path}/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} --type foobar --op list --pgid {pg}").format(osd="fakeosd", pg=ONEPG, path=CEPH_BIN)
+ ERRORS += test_failure(cmd, "Unable to create store of type foobar")
+
+ # Don't specify a data-path
+ cmd = "{path}/ceph-objectstore-tool --no-mon-config --type memstore --op list --pgid {pg}".format(pg=ONEPG, path=CEPH_BIN)
+ ERRORS += test_failure(cmd, "Must provide --data-path")
+
+ cmd = (CFSD_PREFIX + "--op remove --pgid 2.0").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "Please use export-remove or you must use --force option")
+
+ cmd = (CFSD_PREFIX + "--force --op remove").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "Must provide pgid")
+
+ # Don't secify a --op nor object command
+ cmd = CFSD_PREFIX.format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "Must provide --op or object command...")
+
+ # Specify a bad --op command
+ cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)")
+
+ # Provide just the object param not a command
+ cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "Invalid syntax, missing command")
+
+ # Provide an object name that doesn't exist
+ cmd = (CFSD_PREFIX + "NON_OBJECT get-bytes").format(osd=ONEOSD)
+ ERRORS += test_failure(cmd, "No object id 'NON_OBJECT' found")
+
+ # Provide an invalid object command
+ cmd = (CFSD_PREFIX + "--pgid {pg} '' notacommand").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Unknown object command 'notacommand'")
+
+ cmd = (CFSD_PREFIX + "foo list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "No object id 'foo' found or invalid JSON specified")
+
+ cmd = (CFSD_PREFIX + "'{{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}}' list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Without --pgid the object '{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}' must be a JSON array")
+
+ cmd = (CFSD_PREFIX + "'[]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Object '[]' must be a JSON array with 2 elements")
+
+ cmd = (CFSD_PREFIX + "'[\"1.0\"]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Object '[\"1.0\"]' must be a JSON array with 2 elements")
+
+ cmd = (CFSD_PREFIX + "'[\"1.0\", 5, 8, 9]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Object '[\"1.0\", 5, 8, 9]' must be a JSON array with 2 elements")
+
+ cmd = (CFSD_PREFIX + "'[1, 2]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Object '[1, 2]' must be a JSON array with the first element a string")
+
+ cmd = (CFSD_PREFIX + "'[\"1.3\",{{\"snapid\":\"not an int\"}}]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+ ERRORS += test_failure(cmd, "Decode object JSON error: value type is 2 not 4")
+
+ TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid)
+ ALLPGS = OBJREPPGS + OBJECPGS
+ OSDS = get_osds(ALLPGS[0], OSDDIR)
+ osd = OSDS[0]
+
+ print("Test all --op dump-journal")
+ ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]
+ ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS)
+
+ # Test --op list and generate json for all objects
+ print("Test --op list variants")
+
+ # retrieve all objects from all PGs
+ tmpfd = open(TMPFILE, "wb")
+ cmd = (CFSD_PREFIX + "--op list --format json").format(osd=osd)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+ ERRORS += 1
+ tmpfd.close()
+ lines = get_lines(TMPFILE)
+ JSONOBJ = sorted(set(lines))
+ (pgid, coll, jsondict) = json.loads(JSONOBJ[0])[0]
+
+ # retrieve all objects in a given PG
+ tmpfd = open(OTHERFILE, "ab")
+ cmd = (CFSD_PREFIX + "--op list --pgid {pg} --format json").format(osd=osd, pg=pgid)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+ ERRORS += 1
+ tmpfd.close()
+ lines = get_lines(OTHERFILE)
+ JSONOBJ = sorted(set(lines))
+ (other_pgid, other_coll, other_jsondict) = json.loads(JSONOBJ[0])[0]
+
+ if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll:
+ logging.error("the first line of --op list is different "
+ "from the first line of --op list --pgid {pg}".format(pg=pgid))
+ ERRORS += 1
+
+ # retrieve all objects with a given name in a given PG
+ tmpfd = open(OTHERFILE, "wb")
+ cmd = (CFSD_PREFIX + "--op list --pgid {pg} {object} --format json").format(osd=osd, pg=pgid, object=jsondict['oid'])
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+ ERRORS += 1
+ tmpfd.close()
+ lines = get_lines(OTHERFILE)
+ JSONOBJ = sorted(set(lines))
+ (other_pgid, other_coll, other_jsondict) in json.loads(JSONOBJ[0])[0]
+
+ if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll:
+ logging.error("the first line of --op list is different "
+ "from the first line of --op list --pgid {pg} {object}".format(pg=pgid, object=jsondict['oid']))
+ ERRORS += 1
+
+ print("Test --op list by generating json for all objects using default format")
+ for pg in ALLPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ for osd in OSDS:
+ tmpfd = open(TMPFILE, "ab")
+ cmd = (CFSD_PREFIX + "--op list --pgid {pg}").format(osd=osd, pg=pg)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from --op list request".format(ret=ret))
+ ERRORS += 1
+
+ tmpfd.close()
+ lines = get_lines(TMPFILE)
+ JSONOBJ = sorted(set(lines))
+ for JSON in JSONOBJ:
+ (pgid, jsondict) = json.loads(JSON)
+ # Skip clones for now
+ if jsondict['snapid'] != -2:
+ continue
+ db[jsondict['namespace']][jsondict['oid']]['json'] = json.dumps((pgid, jsondict))
+ # print db[jsondict['namespace']][jsondict['oid']]['json']
+ if jsondict['oid'].find(EC_NAME) == 0 and 'shard_id' not in jsondict:
+ logging.error("Malformed JSON {json}".format(json=JSON))
+ ERRORS += 1
+
+ # Test get-bytes
+ print("Test get-bytes and set-bytes")
+ for nspace in db.keys():
+ for basename in db[nspace].keys():
+ file = os.path.join(DATADIR, nspace + "-" + basename + "__head")
+ JSON = db[nspace][basename]['json']
+ GETNAME = "/tmp/getbytes.{pid}".format(pid=pid)
+ TESTNAME = "/tmp/testbytes.{pid}".format(pid=pid)
+ SETNAME = "/tmp/setbytes.{pid}".format(pid=pid)
+ BADNAME = "/tmp/badbytes.{pid}".format(pid=pid)
+ for pg in OBJREPPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ for osd in OSDS:
+ DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+ fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+ and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+ if not fnames:
+ continue
+ try:
+ os.unlink(GETNAME)
+ except:
+ pass
+ cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-bytes {fname}").format(osd=osd, pg=pg, json=JSON, fname=GETNAME)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret}".format(ret=ret))
+ ERRORS += 1
+ continue
+ cmd = "diff -q {file} {getfile}".format(file=file, getfile=GETNAME)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Data from get-bytes differ")
+ logging.debug("Got:")
+ cat_file(logging.DEBUG, GETNAME)
+ logging.debug("Expected:")
+ cat_file(logging.DEBUG, file)
+ ERRORS += 1
+ fd = open(SETNAME, "w")
+ data = "put-bytes going into {file}\n".format(file=file)
+ fd.write(data)
+ fd.close()
+ cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=SETNAME)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-bytes".format(ret=ret))
+ ERRORS += 1
+ fd = open(TESTNAME, "wb")
+ cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=fd)
+ fd.close()
+ if ret != 0:
+ logging.error("Bad exit status {ret} from get-bytes".format(ret=ret))
+ ERRORS += 1
+ cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Data after set-bytes differ")
+ logging.debug("Got:")
+ cat_file(logging.DEBUG, TESTNAME)
+ logging.debug("Expected:")
+ cat_file(logging.DEBUG, SETNAME)
+ ERRORS += 1
+
+ # Use set-bytes with --dry-run and make sure contents haven't changed
+ fd = open(BADNAME, "w")
+ data = "Bad data for --dry-run in {file}\n".format(file=file)
+ fd.write(data)
+ fd.close()
+ cmd = (CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=BADNAME)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-bytes --dry-run".format(ret=ret))
+ ERRORS += 1
+ fd = open(TESTNAME, "wb")
+ cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=fd)
+ fd.close()
+ if ret != 0:
+ logging.error("Bad exit status {ret} from get-bytes".format(ret=ret))
+ ERRORS += 1
+ cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Data after set-bytes --dry-run changed!")
+ logging.debug("Got:")
+ cat_file(logging.DEBUG, TESTNAME)
+ logging.debug("Expected:")
+ cat_file(logging.DEBUG, SETNAME)
+ ERRORS += 1
+
+ fd = open(file, "rb")
+ cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes").format(osd=osd, pg=pg, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdin=fd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-bytes to restore object".format(ret=ret))
+ ERRORS += 1
+ fd.close()
+
+ try:
+ os.unlink(GETNAME)
+ except:
+ pass
+ try:
+ os.unlink(TESTNAME)
+ except:
+ pass
+ try:
+ os.unlink(SETNAME)
+ except:
+ pass
+ try:
+ os.unlink(BADNAME)
+ except:
+ pass
+
+ # Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap
+ print("Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap")
+ for nspace in db.keys():
+ for basename in db[nspace].keys():
+ file = os.path.join(DATADIR, nspace + "-" + basename + "__head")
+ JSON = db[nspace][basename]['json']
+ for pg in OBJREPPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ for osd in OSDS:
+ DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+ fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+ and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+ if not fnames:
+ continue
+ for key, val in db[nspace][basename]["xattr"].items():
+ attrkey = "_" + key
+ cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ getval = check_output(cmd, shell=True).decode()
+ if getval != val:
+ logging.error("get-attr of key {key} returned wrong val: {get} instead of {orig}".format(key=attrkey, get=getval, orig=val))
+ ERRORS += 1
+ continue
+ # set-attr to bogus value "foobar"
+ cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-attr".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Test set-attr with dry-run
+ cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-attr".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Check the set-attr
+ cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ getval = check_output(cmd, shell=True).decode()
+ if ret != 0:
+ logging.error("Bad exit status {ret} from get-attr".format(ret=ret))
+ ERRORS += 1
+ continue
+ if getval != "foobar":
+ logging.error("Check of set-attr failed because we got {val}".format(val=getval))
+ ERRORS += 1
+ continue
+ # Test rm-attr
+ cmd = (CFSD_PREFIX + "'{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from rm-attr".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Check rm-attr with dry-run
+ cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from rm-attr".format(ret=ret))
+ ERRORS += 1
+ continue
+ cmd = (CFSD_PREFIX + "'{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd)
+ if ret == 0:
+ logging.error("For rm-attr expect get-attr to fail, but it succeeded")
+ ERRORS += 1
+ # Put back value
+ cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey, val=val)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-attr".format(ret=ret))
+ ERRORS += 1
+ continue
+
+ hdr = db[nspace][basename].get("omapheader", "")
+ cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ gethdr = check_output(cmd, shell=True).decode()
+ if gethdr != hdr:
+ logging.error("get-omaphdr was wrong: {get} instead of {orig}".format(get=gethdr, orig=hdr))
+ ERRORS += 1
+ continue
+ # set-omaphdr to bogus value "foobar"
+ cmd = ("echo -n foobar | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Check the set-omaphdr
+ cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, pg=pg, json=JSON)
+ logging.debug(cmd)
+ gethdr = check_output(cmd, shell=True).decode()
+ if ret != 0:
+ logging.error("Bad exit status {ret} from get-omaphdr".format(ret=ret))
+ ERRORS += 1
+ continue
+ if gethdr != "foobar":
+ logging.error("Check of set-omaphdr failed because we got {val}".format(val=getval))
+ ERRORS += 1
+ continue
+ # Test dry-run with set-omaphdr
+ cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Put back value
+ cmd = ("echo -n {val} | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON, val=hdr)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret))
+ ERRORS += 1
+ continue
+
+ for omapkey, val in db[nspace][basename]["omap"].items():
+ cmd = (CFSD_PREFIX + " '{json}' get-omap {key}").format(osd=osd, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ getval = check_output(cmd, shell=True).decode()
+ if getval != val:
+ logging.error("get-omap of key {key} returned wrong val: {get} instead of {orig}".format(key=omapkey, get=getval, orig=val))
+ ERRORS += 1
+ continue
+ # set-omap to bogus value "foobar"
+ cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-omap".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Check set-omap with dry-run
+ cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-omap".format(ret=ret))
+ ERRORS += 1
+ continue
+ # Check the set-omap
+ cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ getval = check_output(cmd, shell=True).decode()
+ if ret != 0:
+ logging.error("Bad exit status {ret} from get-omap".format(ret=ret))
+ ERRORS += 1
+ continue
+ if getval != "foobar":
+ logging.error("Check of set-omap failed because we got {val}".format(val=getval))
+ ERRORS += 1
+ continue
+ # Test rm-omap
+ cmd = (CFSD_PREFIX + "'{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from rm-omap".format(ret=ret))
+ ERRORS += 1
+ # Check rm-omap with dry-run
+ cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from rm-omap".format(ret=ret))
+ ERRORS += 1
+ cmd = (CFSD_PREFIX + "'{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd)
+ if ret == 0:
+ logging.error("For rm-omap expect get-omap to fail, but it succeeded")
+ ERRORS += 1
+ # Put back value
+ cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey, val=val)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from set-omap".format(ret=ret))
+ ERRORS += 1
+ continue
+
+ # Test dump
+ print("Test dump")
+ for nspace in db.keys():
+ for basename in db[nspace].keys():
+ file = os.path.join(DATADIR, nspace + "-" + basename + "__head")
+ JSON = db[nspace][basename]['json']
+ jsondict = json.loads(JSON)
+ for pg in OBJREPPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ for osd in OSDS:
+ DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+ fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+ and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+ if not fnames:
+ continue
+ if int(basename.split(REP_NAME)[1]) > int(NUM_CLONED_REP_OBJECTS):
+ continue
+ logging.debug("REPobject " + JSON)
+ cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"snap\": 1,' > /dev/null").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Invalid dump for {json}".format(json=JSON))
+ ERRORS += 1
+ if 'shard_id' in jsondict[1]:
+ logging.debug("ECobject " + JSON)
+ for pg in OBJECPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ jsondict = json.loads(JSON)
+ for osd in OSDS:
+ DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+ fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+ and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+ if not fnames:
+ continue
+ if int(basename.split(EC_NAME)[1]) > int(NUM_EC_OBJECTS):
+ continue
+ # Fix shard_id since we only have one json instance for each object
+ jsondict[1]['shard_id'] = int(pg.split('s')[1])
+ cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"hinfo\": [{{]' > /dev/null").format(osd=osd, json=json.dumps((pg, jsondict[1])))
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Invalid dump for {json}".format(json=JSON))
+
+ print("Test list-attrs get-attr")
+ ATTRFILE = r"/tmp/attrs.{pid}".format(pid=pid)
+ VALFILE = r"/tmp/val.{pid}".format(pid=pid)
+ for nspace in db.keys():
+ for basename in db[nspace].keys():
+ file = os.path.join(DATADIR, nspace + "-" + basename)
+ JSON = db[nspace][basename]['json']
+ jsondict = json.loads(JSON)
+
+ if 'shard_id' in jsondict[1]:
+ logging.debug("ECobject " + JSON)
+ found = 0
+ for pg in OBJECPGS:
+ OSDS = get_osds(pg, OSDDIR)
+ # Fix shard_id since we only have one json instance for each object
+ jsondict[1]['shard_id'] = int(pg.split('s')[1])
+ JSON = json.dumps((pg, jsondict[1]))
+ for osd in OSDS:
+ cmd = (CFSD_PREFIX + " --tty '{json}' get-attr hinfo_key").format(osd=osd, json=JSON)
+ logging.debug("TRY: " + cmd)
+ try:
+ out = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode()
+ logging.debug("FOUND: {json} in {osd} has value '{val}'".format(osd=osd, json=JSON, val=out))
+ found += 1
+ except subprocess.CalledProcessError as e:
+ logging.debug("Error message: {output}".format(output=e.output))
+ if "No such file or directory" not in str(e.output) and \
+ "No data available" not in str(e.output) and \
+ "not contained by pg" not in str(e.output):
+ raise
+ # Assuming k=2 m=1 for the default ec pool
+ if found != 3:
+ logging.error("{json} hinfo_key found {found} times instead of 3".format(json=JSON, found=found))
+ ERRORS += 1
+
+ for pg in ALLPGS:
+ # Make sure rep obj with rep pg or ec obj with ec pg
+ if ('shard_id' in jsondict[1]) != (pg.find('s') > 0):
+ continue
+ if 'shard_id' in jsondict[1]:
+ # Fix shard_id since we only have one json instance for each object
+ jsondict[1]['shard_id'] = int(pg.split('s')[1])
+ JSON = json.dumps((pg, jsondict[1]))
+ OSDS = get_osds(pg, OSDDIR)
+ for osd in OSDS:
+ DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+ fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+ and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+ if not fnames:
+ continue
+ afd = open(ATTRFILE, "wb")
+ cmd = (CFSD_PREFIX + " '{json}' list-attrs").format(osd=osd, json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=afd)
+ afd.close()
+ if ret != 0:
+ logging.error("list-attrs failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ continue
+ keys = get_lines(ATTRFILE)
+ values = dict(db[nspace][basename]["xattr"])
+ for key in keys:
+ if key == "_" or key == "snapset" or key == "hinfo_key":
+ continue
+ key = key.strip("_")
+ if key not in values:
+ logging.error("Unexpected key {key} present".format(key=key))
+ ERRORS += 1
+ continue
+ exp = values.pop(key)
+ vfd = open(VALFILE, "wb")
+ cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key="_" + key)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=vfd)
+ vfd.close()
+ if ret != 0:
+ logging.error("get-attr failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ continue
+ lines = get_lines(VALFILE)
+ val = lines[0]
+ if exp != val:
+ logging.error("For key {key} got value {got} instead of {expected}".format(key=key, got=val, expected=exp))
+ ERRORS += 1
+ if len(values) != 0:
+ logging.error("Not all keys found, remaining keys:")
+ print(values)
+
+ print("Test --op meta-list")
+ tmpfd = open(TMPFILE, "wb")
+ cmd = (CFSD_PREFIX + "--op meta-list").format(osd=ONEOSD)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Bad exit status {ret} from --op meta-list request".format(ret=ret))
+ ERRORS += 1
+
+ print("Test get-bytes on meta")
+ tmpfd.close()
+ lines = get_lines(TMPFILE)
+ JSONOBJ = sorted(set(lines))
+ for JSON in JSONOBJ:
+ (pgid, jsondict) = json.loads(JSON)
+ if pgid != "meta":
+ logging.error("pgid incorrect for --op meta-list {pgid}".format(pgid=pgid))
+ ERRORS += 1
+ if jsondict['namespace'] != "":
+ logging.error("namespace non null --op meta-list {ns}".format(ns=jsondict['namespace']))
+ ERRORS += 1
+ logging.info(JSON)
+ try:
+ os.unlink(GETNAME)
+ except:
+ pass
+ cmd = (CFSD_PREFIX + "'{json}' get-bytes {fname}").format(osd=ONEOSD, json=JSON, fname=GETNAME)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True)
+ if ret != 0:
+ logging.error("Bad exit status {ret}".format(ret=ret))
+ ERRORS += 1
+
+ try:
+ os.unlink(GETNAME)
+ except:
+ pass
+ try:
+ os.unlink(TESTNAME)
+ except:
+ pass
+
+ print("Test pg info")
+ for pg in ALLREPPGS + ALLECPGS:
+ for osd in get_osds(pg, OSDDIR):
+ cmd = (CFSD_PREFIX + "--op info --pgid {pg} | grep '\"pgid\": \"{pg}\"'").format(osd=osd, pg=pg)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Getting info failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ ERRORS += 1
+
+ print("Test pg logging")
+ if len(ALLREPPGS + ALLECPGS) == len(OBJREPPGS + OBJECPGS):
+ logging.warning("All PGs have objects, so no log without modify entries")
+ for pg in ALLREPPGS + ALLECPGS:
+ for osd in get_osds(pg, OSDDIR):
+ tmpfd = open(TMPFILE, "wb")
+ cmd = (CFSD_PREFIX + "--op log --pgid {pg}").format(osd=osd, pg=pg)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=tmpfd)
+ if ret != 0:
+ logging.error("Getting log failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ ERRORS += 1
+ HASOBJ = pg in OBJREPPGS + OBJECPGS
+ MODOBJ = False
+ for line in get_lines(TMPFILE):
+ if line.find("modify") != -1:
+ MODOBJ = True
+ break
+ if HASOBJ != MODOBJ:
+ logging.error("Bad log for pg {pg} from {osd}".format(pg=pg, osd=osd))
+ MSG = (HASOBJ and [""] or ["NOT "])[0]
+ print("Log should {msg}have a modify entry".format(msg=MSG))
+ ERRORS += 1
+
+ try:
+ os.unlink(TMPFILE)
+ except:
+ pass
+
+ print("Test list-pgs")
+ for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+
+ CHECK_PGS = get_osd_pgs(os.path.join(OSDDIR, osd), None)
+ CHECK_PGS = sorted(CHECK_PGS)
+
+ cmd = (CFSD_PREFIX + "--op list-pgs").format(osd=osd)
+ logging.debug(cmd)
+ TEST_PGS = check_output(cmd, shell=True).decode().split("\n")
+ TEST_PGS = sorted(TEST_PGS)[1:] # Skip extra blank line
+
+ if TEST_PGS != CHECK_PGS:
+ logging.error("list-pgs got wrong result for osd.{osd}".format(osd=osd))
+ logging.error("Expected {pgs}".format(pgs=CHECK_PGS))
+ logging.error("Got {pgs}".format(pgs=TEST_PGS))
+ ERRORS += 1
+
+ EXP_ERRORS = 0
+ print("Test pg export --dry-run")
+ pg = ALLREPPGS[0]
+ osd = get_osds(pg, OSDDIR)[0]
+ fname = "/tmp/fname.{pid}".format(pid=pid)
+ cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ EXP_ERRORS += 1
+ elif os.path.exists(fname):
+ logging.error("Exporting --dry-run created file")
+ EXP_ERRORS += 1
+
+ cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ EXP_ERRORS += 1
+ else:
+ outdata = get_lines(fname)
+ if len(outdata) > 0:
+ logging.error("Exporting --dry-run to stdout not empty")
+ logging.error("Data: " + outdata)
+ EXP_ERRORS += 1
+
+ os.mkdir(TESTDIR)
+ for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+ os.mkdir(os.path.join(TESTDIR, osd))
+ print("Test pg export")
+ for pg in ALLREPPGS + ALLECPGS:
+ for osd in get_osds(pg, OSDDIR):
+ mydir = os.path.join(TESTDIR, osd)
+ fname = os.path.join(mydir, pg)
+ if pg == ALLREPPGS[0]:
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname)
+ elif pg == ALLREPPGS[1]:
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file - > {file}").format(osd=osd, pg=pg, file=fname)
+ else:
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ EXP_ERRORS += 1
+
+ ERRORS += EXP_ERRORS
+
+ print("Test clear-data-digest")
+ for nspace in db.keys():
+ for basename in db[nspace].keys():
+ JSON = db[nspace][basename]['json']
+ cmd = (CFSD_PREFIX + "'{json}' clear-data-digest").format(osd='osd0', json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Clearing data digest failed for {json}".format(json=JSON))
+ ERRORS += 1
+ break
+ cmd = (CFSD_PREFIX + "'{json}' dump | grep '\"data_digest\": \"0xff'").format(osd='osd0', json=JSON)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Data digest not cleared for {json}".format(json=JSON))
+ ERRORS += 1
+ break
+ break
+ break
+
+ print("Test pg removal")
+ RM_ERRORS = 0
+ for pg in ALLREPPGS + ALLECPGS:
+ for osd in get_osds(pg, OSDDIR):
+ # This should do nothing
+ cmd = (CFSD_PREFIX + "--op remove --pgid {pg} --dry-run").format(pg=pg, osd=osd)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Removing --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ RM_ERRORS += 1
+ cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Removing failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ RM_ERRORS += 1
+
+ ERRORS += RM_ERRORS
+
+ IMP_ERRORS = 0
+ if EXP_ERRORS == 0 and RM_ERRORS == 0:
+ print("Test pg import")
+ for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+ dir = os.path.join(TESTDIR, osd)
+ PGS = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
+ for pg in PGS:
+ file = os.path.join(dir, pg)
+ # Make sure this doesn't crash
+ cmd = (CFSD_PREFIX + "--op dump-export --file {file}").format(osd=osd, file=file)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Dump-export failed from {file} with {ret}".format(file=file, ret=ret))
+ IMP_ERRORS += 1
+ # This should do nothing
+ cmd = (CFSD_PREFIX + "--op import --file {file} --dry-run").format(osd=osd, file=file)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+ IMP_ERRORS += 1
+ if pg == PGS[0]:
+ cmd = ("cat {file} |".format(file=file) + CFSD_PREFIX + "--op import").format(osd=osd)
+ elif pg == PGS[1]:
+ cmd = (CFSD_PREFIX + "--op import --file - --pgid {pg} < {file}").format(osd=osd, file=file, pg=pg)
+ else:
+ cmd = (CFSD_PREFIX + "--op import --file {file}").format(osd=osd, file=file)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+ IMP_ERRORS += 1
+ else:
+ logging.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES")
+
+ ERRORS += IMP_ERRORS
+ logging.debug(cmd)
+
+ if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+ print("Verify replicated import data")
+ data_errors, _ = check_data(DATADIR, TMPFILE, OSDDIR, REP_NAME)
+ ERRORS += data_errors
+ else:
+ logging.warning("SKIPPING CHECKING IMPORT DATA DUE TO PREVIOUS FAILURES")
+
+ print("Test all --op dump-journal again")
+ ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]
+ ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS)
+
+ vstart(new=False)
+ wait_for_health()
+
+ if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+ print("Verify erasure coded import data")
+ ERRORS += verify(DATADIR, EC_POOL, EC_NAME, db)
+ # Check replicated data/xattr/omap using rados
+ print("Verify replicated import data using rados")
+ ERRORS += verify(DATADIR, REP_POOL, REP_NAME, db)
+
+ if EXP_ERRORS == 0:
+ NEWPOOL = "rados-import-pool"
+ cmd = "{path}/ceph osd pool create {pool} 8".format(pool=NEWPOOL, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+
+ print("Test rados import")
+ first = True
+ for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+ dir = os.path.join(TESTDIR, osd)
+ for pg in [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]:
+ if pg.find("{id}.".format(id=REPID)) != 0:
+ continue
+ file = os.path.join(dir, pg)
+ if first:
+ first = False
+ # This should do nothing
+ cmd = "{path}/rados import -p {pool} --dry-run {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Rados import --dry-run failed from {file} with {ret}".format(file=file, ret=ret))
+ ERRORS += 1
+ cmd = "{path}/rados -p {pool} ls".format(pool=NEWPOOL, path=CEPH_BIN)
+ logging.debug(cmd)
+ data = check_output(cmd, shell=True).decode()
+ if data:
+ logging.error("'{data}'".format(data=data))
+ logging.error("Found objects after dry-run")
+ ERRORS += 1
+ cmd = "{path}/rados import -p {pool} {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Rados import failed from {file} with {ret}".format(file=file, ret=ret))
+ ERRORS += 1
+ cmd = "{path}/rados import -p {pool} --no-overwrite {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Rados import --no-overwrite failed from {file} with {ret}".format(file=file, ret=ret))
+ ERRORS += 1
+
+ ERRORS += verify(DATADIR, NEWPOOL, REP_NAME, db)
+ else:
+ logging.warning("SKIPPING IMPORT-RADOS TESTS DUE TO PREVIOUS FAILURES")
+
+ # Clear directories of previous portion
+ call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True)
+ call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True)
+ os.mkdir(TESTDIR)
+ os.mkdir(DATADIR)
+
+ # Cause SPLIT_POOL to split and test import with object/log filtering
+ print("Testing import all objects after a split")
+ SPLIT_POOL = "split_pool"
+ PG_COUNT = 1
+ SPLIT_OBJ_COUNT = 5
+ SPLIT_NSPACE_COUNT = 2
+ SPLIT_NAME = "split"
+ cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=SPLIT_POOL, pg=PG_COUNT, path=CEPH_BIN)
+ logging.debug(cmd)
+ call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ SPLITID = get_pool_id(SPLIT_POOL, nullfd)
+ pool_size = int(check_output("{path}/ceph osd pool get {pool} size".format(pool=SPLIT_POOL, path=CEPH_BIN), shell=True, stderr=nullfd).decode().split(" ")[1])
+ EXP_ERRORS = 0
+ RM_ERRORS = 0
+ IMP_ERRORS = 0
+
+ objects = range(1, SPLIT_OBJ_COUNT + 1)
+ nspaces = range(SPLIT_NSPACE_COUNT)
+ for n in nspaces:
+ nspace = get_nspace(n)
+
+ for i in objects:
+ NAME = SPLIT_NAME + "{num}".format(num=i)
+ LNAME = nspace + "-" + NAME
+ DDNAME = os.path.join(DATADIR, LNAME)
+ DDNAME += "__head"
+
+ cmd = "rm -f " + DDNAME
+ logging.debug(cmd)
+ call(cmd, shell=True)
+
+ if i == 1:
+ dataline = range(DATALINECOUNT)
+ else:
+ dataline = range(1)
+ fd = open(DDNAME, "w")
+ data = "This is the split data for " + LNAME + "\n"
+ for _ in dataline:
+ fd.write(data)
+ fd.close()
+
+ cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=SPLIT_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stderr=nullfd)
+ if ret != 0:
+ logging.critical("Rados put command failed with {ret}".format(ret=ret))
+ return 1
+
+ wait_for_health()
+ kill_daemons()
+
+ for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+ os.mkdir(os.path.join(TESTDIR, osd))
+
+ pg = "{pool}.0".format(pool=SPLITID)
+ EXPORT_PG = pg
+
+ export_osds = get_osds(pg, OSDDIR)
+ for osd in export_osds:
+ mydir = os.path.join(TESTDIR, osd)
+ fname = os.path.join(mydir, pg)
+ cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ if ret != 0:
+ logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+ EXP_ERRORS += 1
+
+ ERRORS += EXP_ERRORS
+
+ if EXP_ERRORS == 0:
+ vstart(new=False)
+ wait_for_health()
+
+ cmd = "{path}/ceph osd pool set {pool} pg_num 2".format(pool=SPLIT_POOL, path=CEPH_BIN)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+ time.sleep(5)
+ wait_for_health()
+
+ kill_daemons()
+
+ # Now 2 PGs, poolid.0 and poolid.1
+ # make note of pgs before we remove the pgs...
+ osds = get_osds("{pool}.0".format(pool=SPLITID), OSDDIR);
+ for seed in range(2):
+ pg = "{pool}.{seed}".format(pool=SPLITID, seed=seed)
+
+ for osd in osds:
+ cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+
+ which = 0
+ for osd in osds:
+ # This is weird. The export files are based on only the EXPORT_PG
+ # and where that pg was before the split. Use 'which' to use all
+ # export copies in import.
+ mydir = os.path.join(TESTDIR, export_osds[which])
+ fname = os.path.join(mydir, EXPORT_PG)
+ which += 1
+ cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=osd, pg=EXPORT_PG, file=fname)
+ logging.debug(cmd)
+ ret = call(cmd, shell=True, stdout=nullfd)
+ if ret != 0:
+ logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+ IMP_ERRORS += 1
+
+ ERRORS += IMP_ERRORS
+
+ # Start up again to make sure imports didn't corrupt anything
+ if IMP_ERRORS == 0:
+ print("Verify split import data")
+ data_errors, count = check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME)
+ ERRORS += data_errors
+ if count != (SPLIT_OBJ_COUNT * SPLIT_NSPACE_COUNT * pool_size):
+ logging.error("Incorrect number of replicas seen {count}".format(count=count))
+ ERRORS += 1
+ vstart(new=False)
+ wait_for_health()
+
+ call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True)
+ call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True)
+
+ ERRORS += test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS)
+
+ # vstart() starts 4 OSDs
+ ERRORS += test_get_set_osdmap(CFSD_PREFIX, list(range(4)), ALLOSDS)
+ ERRORS += test_get_set_inc_osdmap(CFSD_PREFIX, ALLOSDS[0])
+
+ kill_daemons()
+ CORES = [f for f in os.listdir(CEPH_DIR) if f.startswith("core.")]
+ if CORES:
+ CORE_DIR = os.path.join("/tmp", "cores.{pid}".format(pid=os.getpid()))
+ os.mkdir(CORE_DIR)
+ call("/bin/mv {ceph_dir}/core.* {core_dir}".format(ceph_dir=CEPH_DIR, core_dir=CORE_DIR), shell=True)
+ logging.error("Failure due to cores found")
+ logging.error("See {core_dir} for cores".format(core_dir=CORE_DIR))
+ ERRORS += len(CORES)
+
+ if ERRORS == 0:
+ print("TEST PASSED")
+ return 0
+ else:
+ print("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS))
+ return 1
+
+
+def remove_btrfs_subvolumes(path):
+ if platform.system() == "FreeBSD":
+ return
+ result = subprocess.Popen("stat -f -c '%%T' %s" % path, shell=True, stdout=subprocess.PIPE)
+ for line in result.stdout:
+ filesystem = line.decode('utf-8').rstrip('\n')
+ if filesystem == "btrfs":
+ result = subprocess.Popen("sudo btrfs subvolume list %s" % path, shell=True, stdout=subprocess.PIPE)
+ for line in result.stdout:
+ subvolume = line.decode('utf-8').split()[8]
+ # extracting the relative volume name
+ m = re.search(".*(%s.*)" % path, subvolume)
+ if m:
+ found = m.group(1)
+ call("sudo btrfs subvolume delete %s" % found, shell=True)
+
+
+if __name__ == "__main__":
+ status = 1
+ try:
+ status = main(sys.argv[1:])
+ finally:
+ kill_daemons()
+ os.chdir(CEPH_BUILD_DIR)
+ remove_btrfs_subvolumes(CEPH_DIR)
+ call("/bin/rm -fr {dir}".format(dir=CEPH_DIR), shell=True)
+ sys.exit(status)
diff --git a/qa/standalone/special/test-failure.sh b/qa/standalone/special/test-failure.sh
new file mode 100755
index 000000000..cede887d2
--- /dev/null
+++ b/qa/standalone/special/test-failure.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+set -ex
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_failure_log() {
+ local dir=$1
+
+ cat > $dir/test_failure.log << EOF
+This is a fake log file
+*
+*
+*
+*
+*
+This ends the fake log file
+EOF
+
+ # Test fails
+ return 1
+}
+
+function TEST_failure_core_only() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ kill_daemons $dir SEGV mon 5
+ return 0
+}
+
+main test_failure "$@"