diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /qa/standalone | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'qa/standalone')
69 files changed, 26818 insertions, 0 deletions
diff --git a/qa/standalone/README b/qa/standalone/README new file mode 100644 index 000000000..3082442cb --- /dev/null +++ b/qa/standalone/README @@ -0,0 +1,23 @@ +qa/standalone +============= + +These scripts run standalone clusters, but not in a normal way. They make +use of functions ceph-helpers.sh to quickly start/stop daemons against +toy clusters in a single directory. + +They are normally run via teuthology based on qa/suites/rados/standalone/*.yaml. + +You can run them in a git checkout + build directory as well: + + * The qa/run-standalone.sh will run all of them in sequence. This is slow + since there is no parallelism. + + * You can run individual script(s) by specifying the basename or path below + qa/standalone as arguments to qa/run-standalone.sh. + +../qa/run-standalone.sh misc.sh osd/osd-dup.sh + + * Add support for specifying arguments to selected tests by simply adding + list of tests to each argument. + +../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp" diff --git a/qa/standalone/c2c/c2c.sh b/qa/standalone/c2c/c2c.sh new file mode 100755 index 000000000..a6969d555 --- /dev/null +++ b/qa/standalone/c2c/c2c.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +set -ex + +function run_perf_c2c() { + # First get some background system info + uname -a > uname.out + lscpu > lscpu.out + cat /proc/cmdline > cmdline.out + timeout -s INT 10 vmstat -w 1 > vmstat.out || true + sudo dmesg >& dmesg.out + cat /proc/cpuinfo > cpuinfo.out + ps axo psr,time,stat,ppid,pid,pcpu,comm > ps.1.out + ps -eafT > ps.2.out + sudo sysctl -a > sysctl.out + + nodecnt=`lscpu|grep "NUMA node(" |awk '{print $3}'` + for ((i=0; i<$nodecnt; i++)) + do + sudo cat /sys/devices/system/node/node${i}/meminfo > meminfo.$i.out + done + sudo more `sudo find /proc -name status` > proc_parent_child_status.out + sudo more /proc/*/numa_maps > numa_maps.out + + # + # Get separate kernel and user perf-c2c stats + # + sudo perf c2c record -a --ldlat=70 --all-user -o perf_c2c_a_all_user.data sleep 5 + sudo perf c2c report --stdio -i perf_c2c_a_all_user.data > perf_c2c_a_all_user.out 2>&1 + sudo perf c2c report --full-symbols --stdio -i perf_c2c_a_all_user.data > perf_c2c_full-sym_a_all_user.out 2>&1 + + sudo perf c2c record --call-graph dwarf -a --ldlat=70 --all-user -o perf_c2c_g_a_all_user.data sleep 5 + sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_user.data > perf_c2c_g_a_all_user.out 2>&1 + + sudo perf c2c record -a --ldlat=70 --all-kernel -o perf_c2c_a_all_kernel.data sleep 4 + sudo perf c2c report --stdio -i perf_c2c_a_all_kernel.data > perf_c2c_a_all_kernel.out 2>&1 + + sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_all_kernel.data sleep 4 + + sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_kernel.data > perf_c2c_g_a_all_kernel.out 2>&1 + + # + # Get combined kernel and user perf-c2c stats + # + sudo perf c2c record -a --ldlat=70 -o perf_c2c_a_both.data sleep 4 + sudo perf c2c report --stdio -i perf_c2c_a_both.data > perf_c2c_a_both.out 2>&1 + + sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_both.data sleep 4 + sudo perf c2c report -g --stdio -i perf_c2c_g_a_both.data > perf_c2c_g_a_both.out 2>&1 + + # + # Get all-user physical addr stats, in case multiple threads or processes are + # accessing shared memory with different vaddrs. + # + sudo perf c2c record --phys-data -a --ldlat=70 --all-user -o perf_c2c_a_all_user_phys_data.data sleep 5 + sudo perf c2c report --stdio -i perf_c2c_a_all_user_phys_data.data > perf_c2c_a_all_user_phys_data.out 2>&1 +} + +function run() { + local dir=$1 + shift + ( + rm -fr $dir + mkdir $dir + cd $dir + ceph_test_c2c --threads $(($(nproc) * 2)) "$@" & + sleep 30 # let it warm up + run_perf_c2c + kill $! || { echo "ceph_test_c2c WAS NOT RUNNING" ; exit 1 ; } + ) || exit 1 +} + +function bench() { + optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) --sharding 2> /dev/null || true) + not_optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) 2> /dev/null || true) + if ! (( $optimized > ( $not_optimized * 2 ) )) ; then + echo "the optimization is expected to be at least x2 faster" + exit 1 + fi +} + +run with-sharding --sharding +run without-sharding +bench diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh new file mode 100755 index 000000000..bf2c91bc0 --- /dev/null +++ b/qa/standalone/ceph-helpers.sh @@ -0,0 +1,2409 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014,2015 Red Hat <contact@redhat.com> +# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Federico Gimenez <fgimenez@coit.es> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +TIMEOUT=300 +WAIT_FOR_CLEAN_TIMEOUT=90 +MAX_TIMEOUT=15 +PG_NUM=4 +TMPDIR=${TMPDIR:-/tmp} +CEPH_BUILD_VIRTUALENV=${TMPDIR} +TESTDIR=${TESTDIR:-${TMPDIR}} + +if type xmlstarlet > /dev/null 2>&1; then + XMLSTARLET=xmlstarlet +elif type xml > /dev/null 2>&1; then + XMLSTARLET=xml +else + echo "Missing xmlstarlet binary!" + exit 1 +fi + +if [ `uname` = FreeBSD ]; then + SED=gsed + AWK=gawk + DIFFCOLOPTS="" + KERNCORE="kern.corefile" +else + SED=sed + AWK=awk + termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/') + if [ -n "$termwidth" -a "$termwidth" != "0" ]; then + termwidth="-W ${termwidth}" + fi + DIFFCOLOPTS="-y $termwidth" + KERNCORE="kernel.core_pattern" +fi + +EXTRA_OPTS="" + +#! @file ceph-helpers.sh +# @brief Toolbox to manage Ceph cluster dedicated to testing +# +# Example use case: +# +# ~~~~~~~~~~~~~~~~{.sh} +# source ceph-helpers.sh +# +# function mytest() { +# # cleanup leftovers and reset mydir +# setup mydir +# # create a cluster with one monitor and three osds +# run_mon mydir a +# run_osd mydir 0 +# run_osd mydir 2 +# run_osd mydir 3 +# # put and get an object +# rados --pool rbd put GROUP /etc/group +# rados --pool rbd get GROUP /tmp/GROUP +# # stop the cluster and cleanup the directory +# teardown mydir +# } +# ~~~~~~~~~~~~~~~~ +# +# The focus is on simplicity and efficiency, in the context of +# functional tests. The output is intentionally very verbose +# and functions return as soon as an error is found. The caller +# is also expected to abort on the first error so that debugging +# can be done by looking at the end of the output. +# +# Each function is documented, implemented and tested independently. +# When modifying a helper, the test and the documentation are +# expected to be updated and it is easier of they are collocated. A +# test for a given function can be run with +# +# ~~~~~~~~~~~~~~~~{.sh} +# ceph-helpers.sh TESTS test_get_osds +# ~~~~~~~~~~~~~~~~ +# +# and all the tests (i.e. all functions matching test_*) are run +# with: +# +# ~~~~~~~~~~~~~~~~{.sh} +# ceph-helpers.sh TESTS +# ~~~~~~~~~~~~~~~~ +# +# A test function takes a single argument : the directory dedicated +# to the tests. It is expected to not create any file outside of this +# directory and remove it entirely when it completes successfully. +# + + +function get_asok_dir() { + if [ -n "$CEPH_ASOK_DIR" ]; then + echo "$CEPH_ASOK_DIR" + else + echo ${TMPDIR:-/tmp}/ceph-asok.$$ + fi +} + +function get_asok_path() { + local name=$1 + if [ -n "$name" ]; then + echo $(get_asok_dir)/ceph-$name.asok + else + echo $(get_asok_dir)/\$cluster-\$name.asok + fi +} +## +# Cleanup any leftovers found in **dir** via **teardown** +# and reset **dir** as an empty environment. +# +# @param dir path name of the environment +# @return 0 on success, 1 on error +# +function setup() { + local dir=$1 + teardown $dir || return 1 + mkdir -p $dir + mkdir -p $(get_asok_dir) + if [ $(ulimit -n) -le 1024 ]; then + ulimit -n 4096 || return 1 + fi + if [ -z "$LOCALRUN" ]; then + trap "teardown $dir 1" TERM HUP INT + fi +} + +function test_setup() { + local dir=$dir + setup $dir || return 1 + test -d $dir || return 1 + setup $dir || return 1 + test -d $dir || return 1 + teardown $dir +} + +####################################################################### + +## +# Kill all daemons for which a .pid file exists in **dir** and remove +# **dir**. If the file system in which **dir** is btrfs, delete all +# subvolumes that relate to it. +# +# @param dir path name of the environment +# @param dumplogs pass "1" to dump logs otherwise it will only if cores found +# @return 0 on success, 1 on error +# +function teardown() { + local dir=$1 + local dumplogs=$2 + kill_daemons $dir KILL + if [ `uname` != FreeBSD ] \ + && [ $(stat -f -c '%T' .) == "btrfs" ]; then + __teardown_btrfs $dir + fi + local cores="no" + local pattern="$(sysctl -n $KERNCORE)" + # See if we have apport core handling + if [ "${pattern:0:1}" = "|" ]; then + # TODO: Where can we get the dumps? + # Not sure where the dumps really are so this will look in the CWD + pattern="" + fi + # Local we start with core and teuthology ends with core + if ls $(dirname "$pattern") | grep -q '^core\|core$' ; then + cores="yes" + if [ -n "$LOCALRUN" ]; then + mkdir /tmp/cores.$$ 2> /dev/null || true + for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do + mv $i /tmp/cores.$$ + done + fi + fi + if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then + if [ -n "$LOCALRUN" ]; then + display_logs $dir + else + # Move logs to where Teuthology will archive it + mkdir -p $TESTDIR/archive/log + mv $dir/*.log $TESTDIR/archive/log + fi + fi + rm -fr $dir + rm -rf $(get_asok_dir) + if [ "$cores" = "yes" ]; then + echo "ERROR: Failure due to cores found" + if [ -n "$LOCALRUN" ]; then + echo "Find saved core files in /tmp/cores.$$" + fi + return 1 + fi + return 0 +} + +function __teardown_btrfs() { + local btrfs_base_dir=$1 + local btrfs_root=$(df -P . | tail -1 | $AWK '{print $NF}') + local btrfs_dirs=$(cd $btrfs_base_dir; sudo btrfs subvolume list -t . | $AWK '/^[0-9]/ {print $4}' | grep "$btrfs_base_dir/$btrfs_dir") + for subvolume in $btrfs_dirs; do + sudo btrfs subvolume delete $btrfs_root/$subvolume + done +} + +function test_teardown() { + local dir=$dir + setup $dir || return 1 + teardown $dir || return 1 + ! test -d $dir || return 1 +} + +####################################################################### + +## +# Sends a signal to a single daemon. +# This is a helper function for kill_daemons +# +# After the daemon is sent **signal**, its actual termination +# will be verified by sending it signal 0. If the daemon is +# still alive, kill_daemon will pause for a few seconds and +# try again. This will repeat for a fixed number of times +# before kill_daemon returns on failure. The list of +# sleep intervals can be specified as **delays** and defaults +# to: +# +# 0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120 +# +# This sequence is designed to run first a very short sleep time (0.1) +# if the machine is fast enough and the daemon terminates in a fraction of a +# second. The increasing sleep numbers should give plenty of time for +# the daemon to die even on the slowest running machine. If a daemon +# takes more than a few minutes to stop (the sum of all sleep times), +# there probably is no point in waiting more and a number of things +# are likely to go wrong anyway: better give up and return on error. +# +# @param pid the process id to send a signal +# @param send_signal the signal to send +# @param delays sequence of sleep times before failure +# +function kill_daemon() { + local pid=$(cat $1) + local send_signal=$2 + local delays=${3:-0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120} + local exit_code=1 + # In order to try after the last large sleep add 0 at the end so we check + # one last time before dropping out of the loop + for try in $delays 0 ; do + if kill -$send_signal $pid 2> /dev/null ; then + exit_code=1 + else + exit_code=0 + break + fi + send_signal=0 + sleep $try + done; + return $exit_code +} + +function test_kill_daemon() { + local dir=$1 + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + name_prefix=osd + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + # + # sending signal 0 won't kill the daemon + # waiting just for one second instead of the default schedule + # allows us to quickly verify what happens when kill fails + # to stop the daemon (i.e. it must return false) + # + ! kill_daemon $pidfile 0 1 || return 1 + # + # killing just the osd and verify the mon still is responsive + # + kill_daemon $pidfile TERM || return 1 + done + + name_prefix=mgr + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + # + # kill the mgr + # + kill_daemon $pidfile TERM || return 1 + done + + name_prefix=mon + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + # + # kill the mon and verify it cannot be reached + # + kill_daemon $pidfile TERM || return 1 + ! timeout 5 ceph status || return 1 + done + + teardown $dir || return 1 +} + +## +# Kill all daemons for which a .pid file exists in **dir**. Each +# daemon is sent a **signal** and kill_daemons waits for it to exit +# during a few minutes. By default all daemons are killed. If a +# **name_prefix** is provided, only the daemons for which a pid +# file is found matching the prefix are killed. See run_osd and +# run_mon for more information about the name conventions for +# the pid files. +# +# Send TERM to all daemons : kill_daemons $dir +# Send KILL to all daemons : kill_daemons $dir KILL +# Send KILL to all osds : kill_daemons $dir KILL osd +# Send KILL to osd 1 : kill_daemons $dir KILL osd.1 +# +# If a daemon is sent the TERM signal and does not terminate +# within a few minutes, it will still be running even after +# kill_daemons returns. +# +# If all daemons are kill successfully the function returns 0 +# if at least one daemon remains, this is treated as an +# error and the function return 1. +# +# @param dir path name of the environment +# @param signal name of the first signal (defaults to TERM) +# @param name_prefix only kill match daemons (defaults to all) +# @param delays sequence of sleep times before failure +# @return 0 on success, 1 on error +# +function kill_daemons() { + local trace=$(shopt -q -o xtrace && echo true || echo false) + $trace && shopt -u -o xtrace + local dir=$1 + local signal=${2:-TERM} + local name_prefix=$3 # optional, osd, mon, osd.1 + local delays=$4 #optional timing + local status=0 + local pids="" + + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + run_in_background pids kill_daemon $pidfile $signal $delays + done + + wait_background pids + status=$? + + $trace && shopt -s -o xtrace + return $status +} + +function test_kill_daemons() { + local dir=$1 + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + # + # sending signal 0 won't kill the daemon + # waiting just for one second instead of the default schedule + # allows us to quickly verify what happens when kill fails + # to stop the daemon (i.e. it must return false) + # + ! kill_daemons $dir 0 osd 1 || return 1 + # + # killing just the osd and verify the mon still is responsive + # + kill_daemons $dir TERM osd || return 1 + # + # kill the mgr + # + kill_daemons $dir TERM mgr || return 1 + # + # kill the mon and verify it cannot be reached + # + kill_daemons $dir TERM || return 1 + ! timeout 5 ceph status || return 1 + teardown $dir || return 1 +} + +# +# return a random TCP port which is not used yet +# +# please note, there could be racing if we use this function for +# a free port, and then try to bind on this port. +# +function get_unused_port() { + local ip=127.0.0.1 + python3 -c "import socket; s=socket.socket(); s.bind(('$ip', 0)); print(s.getsockname()[1]); s.close()" +} + +####################################################################### + +## +# Run a monitor by the name mon.**id** with data in **dir**/**id**. +# The logs can be found in **dir**/mon.**id**.log and the pid file +# is **dir**/mon.**id**.pid and the admin socket is +# **dir**/**id**/ceph-mon.**id**.asok. +# +# The remaining arguments are passed verbatim to ceph-mon --mkfs +# and the ceph-mon daemon. +# +# Two mandatory arguments must be provided: --fsid and --mon-host +# Instead of adding them to every call to run_mon, they can be +# set in the CEPH_ARGS environment variable to be read implicitly +# by every ceph command. +# +# The CEPH_CONF variable is expected to be set to /dev/null to +# only rely on arguments for configuration. +# +# Examples: +# +# CEPH_ARGS="--fsid=$(uuidgen) " +# CEPH_ARGS+="--mon-host=127.0.0.1:7018 " +# run_mon $dir a # spawn a mon and bind port 7018 +# run_mon $dir a --debug-filestore=20 # spawn with filestore debugging +# +# If mon_initial_members is not set, the default rbd pool is deleted +# and replaced with a replicated pool with less placement groups to +# speed up initialization. If mon_initial_members is set, no attempt +# is made to recreate the rbd pool because it would hang forever, +# waiting for other mons to join. +# +# A **dir**/ceph.conf file is created but not meant to be used by any +# function. It is convenient for debugging a failure with: +# +# ceph --conf **dir**/ceph.conf -s +# +# @param dir path name of the environment +# @param id mon identifier +# @param ... can be any option valid for ceph-mon +# @return 0 on success, 1 on error +# +function run_mon() { + local dir=$1 + shift + local id=$1 + shift + local data=$dir/$id + + ceph-mon \ + --id $id \ + --mkfs \ + --mon-data=$data \ + --run-dir=$dir \ + "$@" || return 1 + + ceph-mon \ + --id $id \ + --osd-failsafe-full-ratio=.99 \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + --mon-data-avail-warn=5 \ + --paxos-propose-interval=0.1 \ + --osd-crush-chooseleaf-type=0 \ + $EXTRA_OPTS \ + --debug-mon 20 \ + --debug-ms 20 \ + --debug-paxos 20 \ + --chdir= \ + --mon-data=$data \ + --log-file=$dir/\$name.log \ + --admin-socket=$(get_asok_path) \ + --mon-cluster-log-file=$dir/log \ + --run-dir=$dir \ + --pid-file=$dir/\$name.pid \ + --mon-allow-pool-delete \ + --mon-allow-pool-size-one \ + --osd-pool-default-pg-autoscale-mode off \ + --mon-osd-backfillfull-ratio .99 \ + --mon-warn-on-insecure-global-id-reclaim-allowed=false \ + "$@" || return 1 + + cat > $dir/ceph.conf <<EOF +[global] +fsid = $(get_config mon $id fsid) +mon host = $(get_config mon $id mon_host) +EOF +} + +function test_run_mon() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + ceph mon dump | grep "mon.a" || return 1 + kill_daemons $dir || return 1 + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + ceph osd dump | grep "pool 1 'rbd'" || return 1 + local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \ + config get osd_pool_default_size) + test "$size" = '{"osd_pool_default_size":"3"}' || return 1 + + ! CEPH_ARGS='' ceph status || return 1 + CEPH_ARGS='' ceph --conf $dir/ceph.conf status || return 1 + + kill_daemons $dir || return 1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \ + config get osd_pool_default_size) + test "$size" = '{"osd_pool_default_size":"1"}' || return 1 + kill_daemons $dir || return 1 + + CEPH_ARGS="$CEPH_ARGS --osd_pool_default_size=2" \ + run_mon $dir a || return 1 + local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \ + config get osd_pool_default_size) + test "$size" = '{"osd_pool_default_size":"2"}' || return 1 + kill_daemons $dir || return 1 + + teardown $dir || return 1 +} + +function create_rbd_pool() { + ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1 + create_pool rbd $PG_NUM || return 1 + rbd pool init rbd +} + +function create_pool() { + ceph osd pool create "$@" + sleep 1 +} + +function delete_pool() { + local poolname=$1 + ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it +} + +####################################################################### + +function run_mgr() { + local dir=$1 + shift + local id=$1 + shift + local data=$dir/$id + + ceph config set mgr mgr_pool false --force + ceph-mgr \ + --id $id \ + $EXTRA_OPTS \ + --osd-failsafe-full-ratio=.99 \ + --debug-mgr 20 \ + --debug-objecter 20 \ + --debug-ms 20 \ + --debug-paxos 20 \ + --chdir= \ + --mgr-data=$data \ + --log-file=$dir/\$name.log \ + --admin-socket=$(get_asok_path) \ + --run-dir=$dir \ + --pid-file=$dir/\$name.pid \ + --mgr-module-path=$(realpath ${CEPH_ROOT}/src/pybind/mgr) \ + "$@" || return 1 +} + +function run_mds() { + local dir=$1 + shift + local id=$1 + shift + local data=$dir/$id + + ceph-mds \ + --id $id \ + $EXTRA_OPTS \ + --debug-mds 20 \ + --debug-objecter 20 \ + --debug-ms 20 \ + --chdir= \ + --mds-data=$data \ + --log-file=$dir/\$name.log \ + --admin-socket=$(get_asok_path) \ + --run-dir=$dir \ + --pid-file=$dir/\$name.pid \ + "$@" || return 1 +} + +####################################################################### + +## +# Create (prepare) and run (activate) an osd by the name osd.**id** +# with data in **dir**/**id**. The logs can be found in +# **dir**/osd.**id**.log, the pid file is **dir**/osd.**id**.pid and +# the admin socket is **dir**/**id**/ceph-osd.**id**.asok. +# +# The remaining arguments are passed verbatim to ceph-osd. +# +# Two mandatory arguments must be provided: --fsid and --mon-host +# Instead of adding them to every call to run_osd, they can be +# set in the CEPH_ARGS environment variable to be read implicitly +# by every ceph command. +# +# The CEPH_CONF variable is expected to be set to /dev/null to +# only rely on arguments for configuration. +# +# The run_osd function creates the OSD data directory on the **dir**/**id** +# directory and relies on the activate_osd function to run the daemon. +# +# Examples: +# +# CEPH_ARGS="--fsid=$(uuidgen) " +# CEPH_ARGS+="--mon-host=127.0.0.1:7018 " +# run_osd $dir 0 # prepare and activate an osd using the monitor listening on 7018 +# +# @param dir path name of the environment +# @param id osd identifier +# @param ... can be any option valid for ceph-osd +# @return 0 on success, 1 on error +# +function run_osd() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + local ceph_args="$CEPH_ARGS" + ceph_args+=" --osd-failsafe-full-ratio=.99" + ceph_args+=" --osd-journal-size=100" + ceph_args+=" --osd-scrub-load-threshold=2000" + ceph_args+=" --osd-data=$osd_data" + ceph_args+=" --osd-journal=${osd_data}/journal" + ceph_args+=" --chdir=" + ceph_args+=$EXTRA_OPTS + ceph_args+=" --run-dir=$dir" + ceph_args+=" --admin-socket=$(get_asok_path)" + ceph_args+=" --debug-osd=20" + ceph_args+=" --debug-ms=1" + ceph_args+=" --debug-monc=20" + ceph_args+=" --log-file=$dir/\$name.log" + ceph_args+=" --pid-file=$dir/\$name.pid" + ceph_args+=" --osd-max-object-name-len=460" + ceph_args+=" --osd-max-object-namespace-len=64" + ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*" + ceph_args+=" --osd-mclock-profile=high_recovery_ops" + ceph_args+=" " + ceph_args+="$@" + mkdir -p $osd_data + + local uuid=`uuidgen` + echo "add osd$id $uuid" + OSD_SECRET=$(ceph-authtool --gen-print-key) + echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json + ceph osd new $uuid -i $osd_data/new.json + rm $osd_data/new.json + ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid + + local key_fn=$osd_data/keyring + cat > $key_fn<<EOF +[osd.$id] +key = $OSD_SECRET +EOF + echo adding osd$id key to auth repository + ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd" + echo start osd.$id + ceph-osd -i $id $ceph_args & + + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi + wait_for_osd up $id || return 1 + +} + +function run_osd_filestore() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + local ceph_args="$CEPH_ARGS" + ceph_args+=" --osd-failsafe-full-ratio=.99" + ceph_args+=" --osd-journal-size=100" + ceph_args+=" --osd-scrub-load-threshold=2000" + ceph_args+=" --osd-data=$osd_data" + ceph_args+=" --osd-journal=${osd_data}/journal" + ceph_args+=" --chdir=" + ceph_args+=$EXTRA_OPTS + ceph_args+=" --run-dir=$dir" + ceph_args+=" --admin-socket=$(get_asok_path)" + ceph_args+=" --debug-osd=20" + ceph_args+=" --debug-ms=1" + ceph_args+=" --debug-monc=20" + ceph_args+=" --log-file=$dir/\$name.log" + ceph_args+=" --pid-file=$dir/\$name.pid" + ceph_args+=" --osd-max-object-name-len=460" + ceph_args+=" --osd-max-object-namespace-len=64" + ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*" + ceph_args+=" " + ceph_args+="$@" + mkdir -p $osd_data + + local uuid=`uuidgen` + echo "add osd$osd $uuid" + OSD_SECRET=$(ceph-authtool --gen-print-key) + echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json + ceph osd new $uuid -i $osd_data/new.json + rm $osd_data/new.json + ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid --osd-objectstore=filestore + + local key_fn=$osd_data/keyring + cat > $key_fn<<EOF +[osd.$osd] +key = $OSD_SECRET +EOF + echo adding osd$id key to auth repository + ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd" + echo start osd.$id + ceph-osd -i $id $ceph_args & + + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi + wait_for_osd up $id || return 1 + + +} + +function test_run_osd() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + + run_osd $dir 1 --osd-max-backfills 20 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.1) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"20"}' || return 1 + + CEPH_ARGS="$CEPH_ARGS --osd-max-backfills 30" run_osd $dir 2 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.2) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"30"}' || return 1 + + teardown $dir || return 1 +} + +####################################################################### + +## +# Shutdown and remove all traces of the osd by the name osd.**id**. +# +# The OSD is shutdown with the TERM signal. It is then removed from +# the auth list, crush map, osd map etc and the files associated with +# it are also removed. +# +# @param dir path name of the environment +# @param id osd identifier +# @return 0 on success, 1 on error +# +function destroy_osd() { + local dir=$1 + local id=$2 + + ceph osd out osd.$id || return 1 + kill_daemons $dir TERM osd.$id || return 1 + ceph osd down osd.$id || return 1 + ceph osd purge osd.$id --yes-i-really-mean-it || return 1 + teardown $dir/$id || return 1 + rm -fr $dir/$id +} + +function test_destroy_osd() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + destroy_osd $dir 0 || return 1 + ! ceph osd dump | grep "osd.$id " || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Run (activate) an osd by the name osd.**id** with data in +# **dir**/**id**. The logs can be found in **dir**/osd.**id**.log, +# the pid file is **dir**/osd.**id**.pid and the admin socket is +# **dir**/**id**/ceph-osd.**id**.asok. +# +# The remaining arguments are passed verbatim to ceph-osd. +# +# Two mandatory arguments must be provided: --fsid and --mon-host +# Instead of adding them to every call to activate_osd, they can be +# set in the CEPH_ARGS environment variable to be read implicitly +# by every ceph command. +# +# The CEPH_CONF variable is expected to be set to /dev/null to +# only rely on arguments for configuration. +# +# The activate_osd function expects a valid OSD data directory +# in **dir**/**id**, either just created via run_osd or re-using +# one left by a previous run of ceph-osd. The ceph-osd daemon is +# run directly on the foreground +# +# The activate_osd function blocks until the monitor reports the osd +# up. If it fails to do so within $TIMEOUT seconds, activate_osd +# fails. +# +# Examples: +# +# CEPH_ARGS="--fsid=$(uuidgen) " +# CEPH_ARGS+="--mon-host=127.0.0.1:7018 " +# activate_osd $dir 0 # activate an osd using the monitor listening on 7018 +# +# @param dir path name of the environment +# @param id osd identifier +# @param ... can be any option valid for ceph-osd +# @return 0 on success, 1 on error +# +function activate_osd() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + local ceph_args="$CEPH_ARGS" + ceph_args+=" --osd-failsafe-full-ratio=.99" + ceph_args+=" --osd-journal-size=100" + ceph_args+=" --osd-scrub-load-threshold=2000" + ceph_args+=" --osd-data=$osd_data" + ceph_args+=" --osd-journal=${osd_data}/journal" + ceph_args+=" --chdir=" + ceph_args+=$EXTRA_OPTS + ceph_args+=" --run-dir=$dir" + ceph_args+=" --admin-socket=$(get_asok_path)" + ceph_args+=" --debug-osd=20" + ceph_args+=" --log-file=$dir/\$name.log" + ceph_args+=" --pid-file=$dir/\$name.pid" + ceph_args+=" --osd-max-object-name-len=460" + ceph_args+=" --osd-max-object-namespace-len=64" + ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*" + ceph_args+=" --osd-mclock-profile=high_recovery_ops" + ceph_args+=" " + ceph_args+="$@" + mkdir -p $osd_data + + echo start osd.$id + ceph-osd -i $id $ceph_args & + + [ "$id" = "$(cat $osd_data/whoami)" ] || return 1 + + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi + wait_for_osd up $id || return 1 +} + +function test_activate_osd() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + + kill_daemons $dir TERM osd || return 1 + + activate_osd $dir 0 --osd-max-backfills 20 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"20"}' || return 1 + + teardown $dir || return 1 +} + +function test_activate_osd_after_mark_down() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + + activate_osd $dir 0 --osd-max-backfills 20 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"20"}' || return 1 + + teardown $dir || return 1 +} + +function test_activate_osd_skip_benchmark() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + # Skip the osd benchmark during first osd bring-up. + run_osd $dir 0 --osd-op-queue=mclock_scheduler \ + --osd-mclock-skip-benchmark=true || return 1 + local max_iops_hdd_def=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd) + local max_iops_ssd_def=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd) + + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + + # Skip the osd benchmark during activation as well. Validate that + # the max osd capacities are left unchanged. + activate_osd $dir 0 --osd-op-queue=mclock_scheduler \ + --osd-mclock-skip-benchmark=true || return 1 + local max_iops_hdd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd) + local max_iops_ssd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd) + + test "$max_iops_hdd_def" = "$max_iops_hdd_after_boot" || return 1 + test "$max_iops_ssd_def" = "$max_iops_ssd_after_boot" || return 1 + + teardown $dir || return 1 +} +####################################################################### + +## +# Wait until the OSD **id** is either up or down, as specified by +# **state**. It fails after $TIMEOUT seconds. +# +# @param state either up or down +# @param id osd identifier +# @return 0 on success, 1 on error +# +function wait_for_osd() { + local state=$1 + local id=$2 + + status=1 + for ((i=0; i < $TIMEOUT; i++)); do + echo $i + if ! ceph osd dump | grep "osd.$id $state"; then + sleep 1 + else + status=0 + break + fi + done + return $status +} + +function test_wait_for_osd() { + local dir=$1 + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + wait_for_osd up 0 || return 1 + wait_for_osd up 1 || return 1 + kill_daemons $dir TERM osd.0 || return 1 + wait_for_osd down 0 || return 1 + ( TIMEOUT=1 ; ! wait_for_osd up 0 ) || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Display the list of OSD ids supporting the **objectname** stored in +# **poolname**, as reported by ceph osd map. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT white space separated list of OSD ids +# @return 0 on success, 1 on error +# +function get_osds() { + local poolname=$1 + local objectname=$2 + + local osds=$(ceph --format json osd map $poolname $objectname 2>/dev/null | \ + jq '.acting | .[]') + # get rid of the trailing space + echo $osds +} + +function test_get_osds() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + create_rbd_pool || return 1 + get_osds rbd GROUP | grep --quiet '^[0-1] [0-1]$' || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Wait for the monitor to form quorum (optionally, of size N) +# +# @param timeout duration (lower-bound) to wait for quorum to be formed +# @param quorumsize size of quorum to wait for +# @return 0 on success, 1 on error +# +function wait_for_quorum() { + local timeout=$1 + local quorumsize=$2 + + if [[ -z "$timeout" ]]; then + timeout=300 + fi + + if [[ -z "$quorumsize" ]]; then + timeout $timeout ceph quorum_status --format=json >&/dev/null || return 1 + return 0 + fi + + no_quorum=1 + wait_until=$((`date +%s` + $timeout)) + while [[ $(date +%s) -lt $wait_until ]]; do + jqfilter='.quorum | length == '$quorumsize + jqinput="$(timeout $timeout ceph quorum_status --format=json 2>/dev/null)" + res=$(echo $jqinput | jq "$jqfilter") + if [[ "$res" == "true" ]]; then + no_quorum=0 + break + fi + done + return $no_quorum +} + +####################################################################### + +## +# Return the PG of supporting the **objectname** stored in +# **poolname**, as reported by ceph osd map. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT a PG +# @return 0 on success, 1 on error +# +function get_pg() { + local poolname=$1 + local objectname=$2 + + ceph --format json osd map $poolname $objectname 2>/dev/null | jq -r '.pgid' +} + +function test_get_pg() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + get_pg rbd GROUP | grep --quiet '^[0-9]\.[0-9a-f][0-9a-f]*$' || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the value of the **config**, obtained via the config get command +# of the admin socket of **daemon**.**id**. +# +# @param daemon mon or osd +# @param id mon or osd ID +# @param config the configuration variable name as found in config_opts.h +# @param STDOUT the config value +# @return 0 on success, 1 on error +# +function get_config() { + local daemon=$1 + local id=$2 + local config=$3 + + CEPH_ARGS='' \ + ceph --format json daemon $(get_asok_path $daemon.$id) \ + config get $config 2> /dev/null | \ + jq -r ".$config" +} + +function test_get_config() { + local dir=$1 + + # override the default config using command line arg and check it + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + test $(get_config mon a osd_pool_default_size) = 1 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_max_scrubs=3 || return 1 + test $(get_config osd 0 osd_max_scrubs) = 3 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Set the **config** to specified **value**, via the config set command +# of the admin socket of **daemon**.**id** +# +# @param daemon mon or osd +# @param id mon or osd ID +# @param config the configuration variable name as found in config_opts.h +# @param value the config value +# @return 0 on success, 1 on error +# +function set_config() { + local daemon=$1 + local id=$2 + local config=$3 + local value=$4 + + test $(env CEPH_ARGS='' ceph --format json daemon $(get_asok_path $daemon.$id) \ + config set $config $value 2> /dev/null | \ + jq 'has("success")') == true +} + +function test_set_config() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + test $(get_config mon a ms_crc_header) = true || return 1 + set_config mon a ms_crc_header false || return 1 + test $(get_config mon a ms_crc_header) = false || return 1 + set_config mon a ms_crc_header true || return 1 + test $(get_config mon a ms_crc_header) = true || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the OSD id of the primary OSD supporting the **objectname** +# stored in **poolname**, as reported by ceph osd map. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT the primary OSD id +# @return 0 on success, 1 on error +# +function get_primary() { + local poolname=$1 + local objectname=$2 + + ceph --format json osd map $poolname $objectname 2>/dev/null | \ + jq '.acting_primary' +} + +function test_get_primary() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + local osd=0 + run_mgr $dir x || return 1 + run_osd $dir $osd || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + test $(get_primary rbd GROUP) = $osd || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the id of any OSD supporting the **objectname** stored in +# **poolname**, as reported by ceph osd map, except the primary. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT the OSD id +# @return 0 on success, 1 on error +# +function get_not_primary() { + local poolname=$1 + local objectname=$2 + + local primary=$(get_primary $poolname $objectname) + ceph --format json osd map $poolname $objectname 2>/dev/null | \ + jq ".acting | map(select (. != $primary)) | .[0]" +} + +function test_get_not_primary() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local primary=$(get_primary rbd GROUP) + local not_primary=$(get_not_primary rbd GROUP) + test $not_primary != $primary || return 1 + test $not_primary = 0 -o $not_primary = 1 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +function _objectstore_tool_nodown() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + ceph-objectstore-tool \ + --data-path $osd_data \ + "$@" || return 1 +} + +function _objectstore_tool_nowait() { + local dir=$1 + shift + local id=$1 + shift + + kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1 + + _objectstore_tool_nodown $dir $id "$@" || return 1 + activate_osd $dir $id $ceph_osd_args >&2 || return 1 +} + +## +# Run ceph-objectstore-tool against the OSD **id** using the data path +# **dir**. The OSD is killed with TERM prior to running +# ceph-objectstore-tool because access to the data path is +# exclusive. The OSD is restarted after the command completes. The +# objectstore_tool returns after all PG are active+clean again. +# +# @param dir the data path of the OSD +# @param id the OSD id +# @param ... arguments to ceph-objectstore-tool +# @param STDIN the input of ceph-objectstore-tool +# @param STDOUT the output of ceph-objectstore-tool +# @return 0 on success, 1 on error +# +# The value of $ceph_osd_args will be passed to restarted osds +# +function objectstore_tool() { + local dir=$1 + shift + local id=$1 + shift + + _objectstore_tool_nowait $dir $id "$@" || return 1 + wait_for_clean >&2 +} + +function test_objectstore_tool() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + local osd=0 + run_mgr $dir x || return 1 + run_osd $dir $osd || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + rados --pool rbd put GROUP /etc/group || return 1 + objectstore_tool $dir $osd GROUP get-bytes | \ + diff - /etc/group + ! objectstore_tool $dir $osd NOTEXISTS get-bytes || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Predicate checking if there is an ongoing recovery in the +# cluster. If any of the recovering_{keys,bytes,objects}_per_sec +# counters are reported by ceph status, it means recovery is in +# progress. +# +# @return 0 if recovery in progress, 1 otherwise +# +function get_is_making_recovery_progress() { + local recovery_progress + recovery_progress+=".recovering_keys_per_sec + " + recovery_progress+=".recovering_bytes_per_sec + " + recovery_progress+=".recovering_objects_per_sec" + local progress=$(ceph --format json status 2>/dev/null | \ + jq -r ".pgmap | $recovery_progress") + test "$progress" != null +} + +function test_get_is_making_recovery_progress() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + ! get_is_making_recovery_progress || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the number of active PGs in the cluster. A PG is active if +# ceph pg dump pgs reports it both **active** and **clean** and that +# not **stale**. +# +# @param STDOUT the number of active PGs +# @return 0 on success, 1 on error +# +function get_num_active_clean() { + local expression + expression+="select(contains(\"active\") and contains(\"clean\")) | " + expression+="select(contains(\"stale\") | not)" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + +function test_get_num_active_clean() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local num_active_clean=$(get_num_active_clean) + test "$num_active_clean" = $PG_NUM || return 1 + teardown $dir || return 1 +} + +## +# Return the number of active or peered PGs in the cluster. A PG matches if +# ceph pg dump pgs reports it is either **active** or **peered** and that +# not **stale**. +# +# @param STDOUT the number of active PGs +# @return 0 on success, 1 on error +# +function get_num_active_or_peered() { + local expression + expression+="select(contains(\"active\") or contains(\"peered\")) | " + expression+="select(contains(\"stale\") | not)" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + +function test_get_num_active_or_peered() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local num_peered=$(get_num_active_or_peered) + test "$num_peered" = $PG_NUM || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the number of PGs in the cluster, according to +# ceph pg dump pgs. +# +# @param STDOUT the number of PGs +# @return 0 on success, 1 on error +# +function get_num_pgs() { + ceph --format json status 2>/dev/null | jq '.pgmap.num_pgs' +} + +function test_get_num_pgs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local num_pgs=$(get_num_pgs) + test "$num_pgs" -gt 0 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the OSD ids in use by at least one PG in the cluster (either +# in the up or the acting set), according to ceph pg dump pgs. Every +# OSD id shows as many times as they are used in up and acting sets. +# If an OSD id is in both the up and acting set of a given PG, it will +# show twice. +# +# @param STDOUT a sorted list of OSD ids +# @return 0 on success, 1 on error +# +function get_osd_id_used_by_pgs() { + ceph --format json pg dump pgs 2>/dev/null | jq '.pg_stats | .[] | .up[], .acting[]' | sort +} + +function test_get_osd_id_used_by_pgs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local osd_ids=$(get_osd_id_used_by_pgs | uniq) + test "$osd_ids" = "0" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Wait until the OSD **id** shows **count** times in the +# PGs (see get_osd_id_used_by_pgs for more information about +# how OSD ids are counted). +# +# @param id the OSD id +# @param count the number of time it must show in the PGs +# @return 0 on success, 1 on error +# +function wait_osd_id_used_by_pgs() { + local id=$1 + local count=$2 + + status=1 + for ((i=0; i < $TIMEOUT / 5; i++)); do + echo $i + if ! test $(get_osd_id_used_by_pgs | grep -c $id) = $count ; then + sleep 5 + else + status=0 + break + fi + done + return $status +} + +function test_wait_osd_id_used_by_pgs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + wait_osd_id_used_by_pgs 0 8 || return 1 + ! TIMEOUT=1 wait_osd_id_used_by_pgs 123 5 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the date and time of the last completed scrub for **pgid**, +# as reported by ceph pg dump pgs. Note that a repair also sets this +# date. +# +# @param pgid the id of the PG +# @param STDOUT the date and time of the last scrub +# @return 0 on success, 1 on error +# +function get_last_scrub_stamp() { + local pgid=$1 + local sname=${2:-last_scrub_stamp} + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function test_get_last_scrub_stamp() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + stamp=$(get_last_scrub_stamp 1.0) + test -n "$stamp" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Predicate checking if the cluster is clean, i.e. all of its PGs are +# in a clean state (see get_num_active_clean for a definition). +# +# @return 0 if the cluster is clean, 1 otherwise +# +function is_clean() { + num_pgs=$(get_num_pgs) + test $num_pgs != 0 || return 1 + test $(get_num_active_clean) = $num_pgs || return 1 +} + +function test_is_clean() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + is_clean || return 1 + teardown $dir || return 1 +} + +####################################################################### + +calc() { $AWK "BEGIN{print $*}"; } + +## +# Return a list of numbers that are increasingly larger and whose +# total is **timeout** seconds. It can be used to have short sleep +# delay while waiting for an event on a fast machine. But if running +# very slowly the larger delays avoid stressing the machine even +# further or spamming the logs. +# +# @param timeout sum of all delays, in seconds +# @return a list of sleep delays +# +function get_timeout_delays() { + local trace=$(shopt -q -o xtrace && echo true || echo false) + $trace && shopt -u -o xtrace + local timeout=$1 + local first_step=${2:-1} + local max_timeout=${3:-$MAX_TIMEOUT} + + local i + local total="0" + i=$first_step + while test "$(calc $total + $i \<= $timeout)" = "1"; do + echo -n "$(calc $i) " + total=$(calc $total + $i) + i=$(calc $i \* 2) + if [ $max_timeout -gt 0 ]; then + # Did we reach max timeout ? + if [ ${i%.*} -eq ${max_timeout%.*} ] && [ ${i#*.} \> ${max_timeout#*.} ] || [ ${i%.*} -gt ${max_timeout%.*} ]; then + # Yes, so let's cap the max wait time to max + i=$max_timeout + fi + fi + done + if test "$(calc $total \< $timeout)" = "1"; then + echo -n "$(calc $timeout - $total) " + fi + $trace && shopt -s -o xtrace +} + +function test_get_timeout_delays() { + test "$(get_timeout_delays 1)" = "1 " || return 1 + test "$(get_timeout_delays 5)" = "1 2 2 " || return 1 + test "$(get_timeout_delays 6)" = "1 2 3 " || return 1 + test "$(get_timeout_delays 7)" = "1 2 4 " || return 1 + test "$(get_timeout_delays 8)" = "1 2 4 1 " || return 1 + test "$(get_timeout_delays 1 .1)" = "0.1 0.2 0.4 0.3 " || return 1 + test "$(get_timeout_delays 1.5 .1)" = "0.1 0.2 0.4 0.8 " || return 1 + test "$(get_timeout_delays 5 .1)" = "0.1 0.2 0.4 0.8 1.6 1.9 " || return 1 + test "$(get_timeout_delays 6 .1)" = "0.1 0.2 0.4 0.8 1.6 2.9 " || return 1 + test "$(get_timeout_delays 6.3 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 " || return 1 + test "$(get_timeout_delays 20 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 7.3 " || return 1 + test "$(get_timeout_delays 300 .1 0)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 12.8 25.6 51.2 102.4 95.3 " || return 1 + test "$(get_timeout_delays 300 .1 10)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 7.3 " || return 1 +} + +####################################################################### + +## +# Wait until the cluster becomes clean or if it does not make progress +# for $WAIT_FOR_CLEAN_TIMEOUT seconds. +# Progress is measured either via the **get_is_making_recovery_progress** +# predicate or if the number of clean PGs changes (as returned by get_num_active_clean) +# +# @return 0 if the cluster is clean, 1 otherwise +# +function wait_for_clean() { + local cmd=$1 + local num_active_clean=-1 + local cur_active_clean + local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + # Comparing get_num_active_clean & get_num_pgs is used to determine + # if the cluster is clean. That's almost an inline of is_clean() to + # get more performance by avoiding multiple calls of get_num_active_clean. + cur_active_clean=$(get_num_active_clean) + test $cur_active_clean = $(get_num_pgs) && break + if test $cur_active_clean != $num_active_clean ; then + loop=0 + num_active_clean=$cur_active_clean + elif get_is_making_recovery_progress ; then + loop=0 + elif (( $loop >= ${#delays[*]} )) ; then + ceph report + return 1 + fi + # eval is a no-op if cmd is empty + eval $cmd + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + +function test_wait_for_clean() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_osd $dir 0 || return 1 + run_mgr $dir x || return 1 + create_rbd_pool || return 1 + ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1 + run_osd $dir 1 || return 1 + wait_for_clean || return 1 + teardown $dir || return 1 +} + +## +# Wait until the cluster becomes peered or if it does not make progress +# for $WAIT_FOR_CLEAN_TIMEOUT seconds. +# Progress is measured either via the **get_is_making_recovery_progress** +# predicate or if the number of peered PGs changes (as returned by get_num_active_or_peered) +# +# @return 0 if the cluster is clean, 1 otherwise +# +function wait_for_peered() { + local cmd=$1 + local num_peered=-1 + local cur_peered + local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + # Comparing get_num_active_clean & get_num_pgs is used to determine + # if the cluster is clean. That's almost an inline of is_clean() to + # get more performance by avoiding multiple calls of get_num_active_clean. + cur_peered=$(get_num_active_or_peered) + test $cur_peered = $(get_num_pgs) && break + if test $cur_peered != $num_peered ; then + loop=0 + num_peered=$cur_peered + elif get_is_making_recovery_progress ; then + loop=0 + elif (( $loop >= ${#delays[*]} )) ; then + ceph report + return 1 + fi + # eval is a no-op if cmd is empty + eval $cmd + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + +function test_wait_for_peered() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_osd $dir 0 || return 1 + run_mgr $dir x || return 1 + create_rbd_pool || return 1 + ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1 + run_osd $dir 1 || return 1 + wait_for_peered || return 1 + teardown $dir || return 1 +} + + +####################################################################### + +## +# Wait until the cluster's health condition disappeared. +# $TIMEOUT default +# +# @param string to grep for in health detail +# @return 0 if the cluster health doesn't matches request, +# 1 otherwise if after $TIMEOUT seconds health condition remains. +# +function wait_for_health_gone() { + local grepstr=$1 + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + while ceph health detail | grep "$grepstr" ; do + if (( $loop >= ${#delays[*]} )) ; then + ceph health detail + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +## +# Wait until the cluster has health condition passed as arg +# again for $TIMEOUT seconds. +# +# @param string to grep for in health detail +# @return 0 if the cluster health matches request, 1 otherwise +# +function wait_for_health() { + local grepstr=$1 + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + while ! ceph health detail | grep "$grepstr" ; do + if (( $loop >= ${#delays[*]} )) ; then + ceph health detail + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +## +# Wait until the cluster becomes HEALTH_OK again or if it does not make progress +# for $TIMEOUT seconds. +# +# @return 0 if the cluster is HEALTHY, 1 otherwise +# +function wait_for_health_ok() { + wait_for_health "HEALTH_OK" || return 1 +} + +function test_wait_for_health_ok() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_failsafe_full_ratio=.99 --mon_pg_warn_min_per_osd=0 || return 1 + run_mgr $dir x --mon_pg_warn_min_per_osd=0 || return 1 + # start osd_pool_default_size OSDs + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + # expect TOO_FEW_OSDS warning + ! TIMEOUT=1 wait_for_health_ok || return 1 + # resurrect all OSDs + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + wait_for_health_ok || return 1 + teardown $dir || return 1 +} + + +####################################################################### + +## +# Run repair on **pgid** and wait until it completes. The repair +# function will fail if repair does not complete within $TIMEOUT +# seconds. +# +# @param pgid the id of the PG +# @return 0 on success, 1 on error +# +function repair() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph pg repair $pgid + wait_for_scrub $pgid "$last_scrub" +} + +function test_repair() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + repair 1.0 || return 1 + kill_daemons $dir KILL osd || return 1 + ! TIMEOUT=1 repair 1.0 || return 1 + teardown $dir || return 1 +} +####################################################################### + +## +# Run scrub on **pgid** and wait until it completes. The pg_scrub +# function will fail if repair does not complete within $TIMEOUT +# seconds. The pg_scrub is complete whenever the +# **get_last_scrub_stamp** function reports a timestamp different from +# the one stored before starting the scrub. +# +# @param pgid the id of the PG +# @return 0 on success, 1 on error +# +function pg_scrub() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph pg scrub $pgid + wait_for_scrub $pgid "$last_scrub" +} + +function pg_deep_scrub() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid last_deep_scrub_stamp) + ceph pg deep-scrub $pgid + wait_for_scrub $pgid "$last_scrub" last_deep_scrub_stamp +} + +function test_pg_scrub() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + pg_scrub 1.0 || return 1 + kill_daemons $dir KILL osd || return 1 + ! TIMEOUT=1 pg_scrub 1.0 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Run the *command* and expect it to fail (i.e. return a non zero status). +# The output (stderr and stdout) is stored in a temporary file in *dir* +# and is expected to contain the string *expected*. +# +# Return 0 if the command failed and the string was found. Otherwise +# return 1 and cat the full output of the command on stderr for debug. +# +# @param dir temporary directory to store the output +# @param expected string to look for in the output +# @param command ... the command and its arguments +# @return 0 on success, 1 on error +# + +function expect_failure() { + local dir=$1 + shift + local expected="$1" + shift + local success + + if "$@" > $dir/out 2>&1 ; then + success=true + else + success=false + fi + + if $success || ! grep --quiet "$expected" $dir/out ; then + cat $dir/out >&2 + return 1 + else + return 0 + fi +} + +function test_expect_failure() { + local dir=$1 + + setup $dir || return 1 + expect_failure $dir FAIL bash -c 'echo FAIL ; exit 1' || return 1 + # the command did not fail + ! expect_failure $dir FAIL bash -c 'echo FAIL ; exit 0' > $dir/out || return 1 + grep --quiet FAIL $dir/out || return 1 + # the command failed but the output does not contain the expected string + ! expect_failure $dir FAIL bash -c 'echo UNEXPECTED ; exit 1' > $dir/out || return 1 + ! grep --quiet FAIL $dir/out || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Given the *last_scrub*, wait for scrub to happen on **pgid**. It +# will fail if scrub does not complete within $TIMEOUT seconds. The +# repair is complete whenever the **get_last_scrub_stamp** function +# reports a timestamp different from the one given in argument. +# +# @param pgid the id of the PG +# @param last_scrub timestamp of the last scrub for *pgid* +# @return 0 on success, 1 on error +# +function wait_for_scrub() { + local pgid=$1 + local last_scrub="$2" + local sname=${3:-last_scrub_stamp} + + for ((i=0; i < $TIMEOUT; i++)); do + if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then + return 0 + fi + sleep 1 + done + return 1 +} + +function test_wait_for_scrub() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local pgid=1.0 + ceph pg repair $pgid + local last_scrub=$(get_last_scrub_stamp $pgid) + wait_for_scrub $pgid "$last_scrub" || return 1 + kill_daemons $dir KILL osd || return 1 + last_scrub=$(get_last_scrub_stamp $pgid) + ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return 0 if the erasure code *plugin* is available, 1 otherwise. +# +# @param plugin erasure code plugin +# @return 0 on success, 1 on error +# + +function erasure_code_plugin_exists() { + local plugin=$1 + local status + local grepstr + local s + case `uname` in + FreeBSD) grepstr="Cannot open.*$plugin" ;; + *) grepstr="$plugin.*No such file" ;; + esac + + s=$(ceph osd erasure-code-profile set TESTPROFILE plugin=$plugin 2>&1) + local status=$? + if [ $status -eq 0 ]; then + ceph osd erasure-code-profile rm TESTPROFILE + elif ! echo $s | grep --quiet "$grepstr" ; then + status=1 + # display why the string was rejected. + echo $s + fi + return $status +} + +function test_erasure_code_plugin_exists() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + erasure_code_plugin_exists jerasure || return 1 + ! erasure_code_plugin_exists FAKE || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Display all log files from **dir** on stdout. +# +# @param dir directory in which all data is stored +# + +function display_logs() { + local dir=$1 + + find $dir -maxdepth 1 -name '*.log' | \ + while read file ; do + echo "======================= $file" + cat $file + done +} + +function test_display_logs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + kill_daemons $dir || return 1 + display_logs $dir > $dir/log.out + grep --quiet mon.a.log $dir/log.out || return 1 + teardown $dir || return 1 +} + +####################################################################### +## +# Spawn a command in background and save the pid in the variable name +# passed in argument. To make the output reading easier, the output is +# prepend with the process id. +# +# Example: +# pids1="" +# run_in_background pids1 bash -c 'sleep 1; exit 1' +# +# @param pid_variable the variable name (not value) where the pids will be stored +# @param ... the command to execute +# @return only the pid_variable output should be considered and used with **wait_background** +# +function run_in_background() { + local pid_variable=$1 + shift + # Execute the command and prepend the output with its pid + # We enforce to return the exit status of the command and not the sed one. + ("$@" |& sed 's/^/'$BASHPID': /'; return "${PIPESTATUS[0]}") >&2 & + eval "$pid_variable+=\" $!\"" +} + +function save_stdout { + local out="$1" + shift + "$@" > "$out" +} + +function test_run_in_background() { + local pids + run_in_background pids sleep 1 + run_in_background pids sleep 1 + test $(echo $pids | wc -w) = 2 || return 1 + wait $pids || return 1 +} + +####################################################################### +## +# Wait for pids running in background to complete. +# This function is usually used after a **run_in_background** call +# Example: +# pids1="" +# run_in_background pids1 bash -c 'sleep 1; exit 1' +# wait_background pids1 +# +# @param pids The variable name that contains the active PIDS. Set as empty at then end of the function. +# @return returns 1 if at least one process exits in error unless returns 0 +# +function wait_background() { + # We extract the PIDS from the variable name + pids=${!1} + + return_code=0 + for pid in $pids; do + if ! wait $pid; then + # If one process failed then return 1 + return_code=1 + fi + done + + # We empty the variable reporting that all process ended + eval "$1=''" + + return $return_code +} + + +function test_wait_background() { + local pids="" + run_in_background pids bash -c "sleep 1; exit 1" + run_in_background pids bash -c "sleep 2; exit 0" + wait_background pids + if [ $? -ne 1 ]; then return 1; fi + + run_in_background pids bash -c "sleep 1; exit 0" + run_in_background pids bash -c "sleep 2; exit 0" + wait_background pids + if [ $? -ne 0 ]; then return 1; fi + + if [ ! -z "$pids" ]; then return 1; fi +} + +function flush_pg_stats() +{ + local timeout=${1:-$TIMEOUT} + + ids=`ceph osd ls` + seqs='' + for osd in $ids; do + seq=`ceph tell osd.$osd flush_pg_stats` + if test -z "$seq" + then + continue + fi + seqs="$seqs $osd-$seq" + done + + for s in $seqs; do + osd=`echo $s | cut -d - -f 1` + seq=`echo $s | cut -d - -f 2` + echo "waiting osd.$osd seq $seq" + while test $(ceph osd last-stat-seq $osd) -lt $seq; do + sleep 1 + if [ $((timeout--)) -eq 0 ]; then + return 1 + fi + done + done +} + +function test_flush_pg_stats() +{ + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + rados -p rbd put obj /etc/group + flush_pg_stats || return 1 + local jq_filter='.pools | .[] | select(.name == "rbd") | .stats' + stored=`ceph df detail --format=json | jq "$jq_filter.stored"` + stored_raw=`ceph df detail --format=json | jq "$jq_filter.stored_raw"` + test $stored -gt 0 || return 1 + test $stored == $stored_raw || return 1 + teardown $dir +} + +######################################################################## +## +# Get the current op scheduler enabled on an osd by reading the +# osd_op_queue config option +# +# Example: +# get_op_scheduler $osdid +# +# @param id the id of the OSD +# @return the name of the op scheduler enabled for the OSD +# +function get_op_scheduler() { + local id=$1 + + get_config osd $id osd_op_queue +} + +function test_get_op_scheduler() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=wpq || return 1 + test $(get_op_scheduler 0) = "wpq" || return 1 + + run_osd $dir 1 --osd_op_queue=mclock_scheduler || return 1 + test $(get_op_scheduler 1) = "mclock_scheduler" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Call the **run** function (which must be defined by the caller) with +# the **dir** argument followed by the caller argument list. +# +# If the **run** function returns on error, all logs found in **dir** +# are displayed for diagnostic purposes. +# +# **teardown** function is called when the **run** function returns +# (on success or on error), to cleanup leftovers. The CEPH_CONF is set +# to /dev/null and CEPH_ARGS is unset so that the tests are protected from +# external interferences. +# +# It is the responsibility of the **run** function to call the +# **setup** function to prepare the test environment (create a temporary +# directory etc.). +# +# The shell is required (via PS4) to display the function and line +# number whenever a statement is executed to help debugging. +# +# @param dir directory in which all data is stored +# @param ... arguments passed transparently to **run** +# @return 0 on success, 1 on error +# +function main() { + local dir=td/$1 + shift + + shopt -s -o xtrace + PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + + export PATH=.:$PATH # make sure program from sources are preferred + export PYTHONWARNINGS=ignore + export CEPH_CONF=/dev/null + unset CEPH_ARGS + + local code + if run $dir "$@" ; then + code=0 + else + code=1 + fi + teardown $dir $code || return 1 + return $code +} + +####################################################################### + +function run_tests() { + shopt -s -o xtrace + PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + + export .:$PATH # make sure program from sources are preferred + + export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export CEPH_CONF=/dev/null + + local funcs=${@:-$(set | sed -n -e 's/^\(test_[0-9a-z_]*\) .*/\1/p')} + local dir=td/ceph-helpers + + for func in $funcs ; do + if ! $func $dir; then + teardown $dir 1 + return 1 + fi + done +} + +if test "$1" = TESTS ; then + shift + run_tests "$@" + exit $? +fi + +# NOTE: +# jq only support --exit-status|-e from version 1.4 forwards, which makes +# returning on error waaaay prettier and straightforward. +# However, the current automated upstream build is running with v1.3, +# which has no idea what -e is. Hence the convoluted error checking we +# need. Sad. +# The next time someone changes this code, please check if v1.4 is now +# a thing, and, if so, please change these to use -e. Thanks. + +# jq '.all.supported | select([.[] == "foo"] | any)' +function jq_success() { + input="$1" + filter="$2" + expects="\"$3\"" + + in_escaped=$(printf %s "$input" | sed "s/'/'\\\\''/g") + filter_escaped=$(printf %s "$filter" | sed "s/'/'\\\\''/g") + + ret=$(echo "$in_escaped" | jq "$filter_escaped") + if [[ "$ret" == "true" ]]; then + return 0 + elif [[ -n "$expects" ]]; then + if [[ "$ret" == "$expects" ]]; then + return 0 + fi + fi + return 1 + input=$1 + filter=$2 + expects="$3" + + ret="$(echo $input | jq \"$filter\")" + if [[ "$ret" == "true" ]]; then + return 0 + elif [[ -n "$expects" && "$ret" == "$expects" ]]; then + return 0 + fi + return 1 +} + +function inject_eio() { + local pooltype=$1 + shift + local which=$1 + shift + local poolname=$1 + shift + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + if [ "$pooltype" != "ec" ]; then + shard_id="" + fi + type=$(cat $dir/$osd_id/type) + set_config osd $osd_id ${type}_debug_inject_read_err true || return 1 + local loop=0 + while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \ + inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do + loop=$(expr $loop + 1) + if [ $loop = "10" ]; then + return 1 + fi + sleep 1 + done +} + +function multidiff() { + if ! diff $@ ; then + if [ "$DIFFCOLOPTS" = "" ]; then + return 1 + fi + diff $DIFFCOLOPTS $@ + fi +} + +function create_ec_pool() { + local pool_name=$1 + shift + local allow_overwrites=$1 + shift + + ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1 + + create_pool "$poolname" 1 1 erasure myprofile || return 1 + + if [ "$allow_overwrites" = "true" ]; then + ceph osd pool set "$poolname" allow_ec_overwrites true || return 1 + fi + + wait_for_clean || return 1 + return 0 +} + +# Local Variables: +# compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config" +# End: diff --git a/qa/standalone/crush/crush-choose-args.sh b/qa/standalone/crush/crush-choose-args.sh new file mode 100755 index 000000000..ee548db12 --- /dev/null +++ b/qa/standalone/crush/crush-choose-args.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7131" # git grep '\<7131\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--crush-location=root=default,host=HOST " + CEPH_ARGS+="--osd-crush-initial-weight=3 " + # + # Disable device auto class feature for now. + # The device class is non-deterministic and will + # crash the crushmap comparison below. + # + CEPH_ARGS+="--osd-class-update-on-start=false " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_choose_args_update() { + # + # adding a weighted OSD updates the weight up to the top + # + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd set-require-min-compat-client luminous + ceph osd getcrushmap > $dir/map || return 1 + crushtool -d $dir/map -o $dir/map.txt || return 1 + sed -i -e '/end crush map/d' $dir/map.txt + cat >> $dir/map.txt <<EOF +# choose_args +choose_args 0 { + { + bucket_id -1 + weight_set [ + [ 2.00000 ] + [ 2.00000 ] + ] + ids [ -10 ] + } + { + bucket_id -2 + weight_set [ + [ 2.00000 ] + [ 2.00000 ] + ] + ids [ -20 ] + } +} + +# end crush map +EOF + crushtool -c $dir/map.txt -o $dir/map-new || return 1 + ceph osd setcrushmap -i $dir/map-new || return 1 + ceph osd crush tree + + run_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-more || return 1 + crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1 + cat $dir/map-one-more.txt + diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-3.txt || return 1 + + destroy_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-less || return 1 + crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1 + diff -u $dir/map-one-less.txt $dir/map.txt || return 1 +} + +function TEST_no_update_weight_set() { + # + # adding a zero weight OSD does not update the weight set at all + # + local dir=$1 + + ORIG_CEPH_ARGS="$CEPH_ARGS" + CEPH_ARGS+="--osd-crush-update-weight-set=false " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd set-require-min-compat-client luminous + ceph osd crush tree + ceph osd getcrushmap > $dir/map || return 1 + crushtool -d $dir/map -o $dir/map.txt || return 1 + sed -i -e '/end crush map/d' $dir/map.txt + cat >> $dir/map.txt <<EOF +# choose_args +choose_args 0 { + { + bucket_id -1 + weight_set [ + [ 2.00000 ] + [ 1.00000 ] + ] + ids [ -10 ] + } + { + bucket_id -2 + weight_set [ + [ 2.00000 ] + [ 1.00000 ] + ] + ids [ -20 ] + } +} + +# end crush map +EOF + crushtool -c $dir/map.txt -o $dir/map-new || return 1 + ceph osd setcrushmap -i $dir/map-new || return 1 + ceph osd crush tree + + + run_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-more || return 1 + crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1 + cat $dir/map-one-more.txt + diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-0.txt || return 1 + + destroy_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-less || return 1 + crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1 + diff -u $dir/map-one-less.txt $dir/map.txt || return 1 + + CEPH_ARGS="$ORIG_CEPH_ARGS" +} + +function TEST_reweight() { + # reweight and reweight-compat behave appropriately + local dir=$1 + + ORIG_CEPH_ARGS="$CEPH_ARGS" + CEPH_ARGS+="--osd-crush-update-weight-set=false " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + + ceph osd crush weight-set create-compat || return 1 + ceph osd crush tree + + ceph osd crush weight-set reweight-compat osd.0 2 || return 1 + ceph osd crush tree + ceph osd crush tree | grep host | grep '6.00000 5.00000' || return 1 + + run_osd $dir 2 || return 1 + ceph osd crush tree + ceph osd crush tree | grep host | grep '9.00000 5.00000' || return 1 + + ceph osd crush reweight osd.2 4 + ceph osd crush tree + ceph osd crush tree | grep host | grep '10.00000 5.00000' || return 1 + + ceph osd crush weight-set reweight-compat osd.2 4 + ceph osd crush tree + ceph osd crush tree | grep host | grep '10.00000 9.00000' || return 1 +} + +function TEST_move_bucket() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + + ceph osd crush weight-set create-compat || return 1 + ceph osd crush weight-set reweight-compat osd.0 2 || return 1 + ceph osd crush weight-set reweight-compat osd.1 2 || return 1 + ceph osd crush tree + ceph osd crush tree | grep HOST | grep '6.00000 4.00000' || return 1 + + # moving a bucket adjusts the weights + ceph osd crush add-bucket RACK rack root=default || return 1 + ceph osd crush move HOST rack=RACK || return 1 + ceph osd crush tree + ceph osd crush tree | grep HOST | grep '6.00000 4.00000' || return 1 + ceph osd crush tree | grep RACK | grep '6.00000 4.00000' || return 1 + + # weight-set reweight adjusts containing buckets + ceph osd crush weight-set reweight-compat osd.0 1 || return 1 + ceph osd crush tree + ceph osd crush tree | grep HOST | grep '6.00000 3.00000' || return 1 + ceph osd crush tree | grep RACK | grep '6.00000 3.00000' || return 1 + + # moving a leaf resets its weight-set to the canonical weight... + ceph config set mon osd_crush_update_weight_set true || return 1 + ceph osd crush add-bucket FOO host root=default || return 1 + ceph osd crush move osd.0 host=FOO || return 1 + ceph osd crush tree + ceph osd crush tree | grep osd.0 | grep '3.00000 3.00000' || return 1 + ceph osd crush tree | grep HOST | grep '3.00000 2.00000' || return 1 + ceph osd crush tree | grep RACK | grep '3.00000 2.00000' || return 1 + + # ...or to zero. + ceph config set mon osd_crush_update_weight_set false || return 1 + ceph osd crush move osd.1 host=FOO || return 1 + ceph osd crush tree + ceph osd crush tree | grep osd.0 | grep '3.00000 3.00000' || return 1 + ceph osd crush tree | grep osd.1 | grep '3.00000 0' || return 1 + ceph osd crush tree | grep FOO | grep '6.00000 3.00000' || return 1 +} + +main crush-choose-args "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-choose-args.sh" +# End: diff --git a/qa/standalone/crush/crush-classes.sh b/qa/standalone/crush/crush-classes.sh new file mode 100755 index 000000000..558aabe6d --- /dev/null +++ b/qa/standalone/crush/crush-classes.sh @@ -0,0 +1,265 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7130" # git grep '\<7130\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # + # Disable auto-class, so we can inject device class manually below + # + CEPH_ARGS+="--osd-class-update-on-start=false " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function add_something() { + local dir=$1 + local obj=${2:-SOMETHING} + + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool rbd put $obj $dir/ORIGINAL || return 1 +} + +function get_osds_up() { + local poolname=$1 + local objectname=$2 + + local osds=$(ceph --format xml osd map $poolname $objectname 2>/dev/null | \ + $XMLSTARLET sel -t -m "//up/osd" -v . -o ' ') + # get rid of the trailing space + echo $osds +} + +function TEST_reweight_vs_classes() { + local dir=$1 + + # CrushWrapper::update_item (and ceph osd crush set) must rebuild the shadow + # tree too. https://tracker.ceph.com/issues/48065 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd crush set-device-class ssd osd.0 || return 1 + ceph osd crush class ls-osd ssd | grep 0 || return 1 + ceph osd crush set-device-class ssd osd.1 || return 1 + ceph osd crush class ls-osd ssd | grep 1 || return 1 + + ceph osd crush reweight osd.0 1 + + h=`hostname -s` + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 65536 + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 65536 + + ceph osd crush set 0 2 host=$h + + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 131072 + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 131072 +} + +function TEST_classes() { + local dir=$1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + + test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1 + add_something $dir SOMETHING || return 1 + + # + # osd.0 has class ssd and the rule is modified + # to only take ssd devices. + # + ceph osd getcrushmap > $dir/map || return 1 + crushtool -d $dir/map -o $dir/map.txt || return 1 + ${SED} -i \ + -e '/device 0 osd.0/s/$/ class ssd/' \ + -e '/step take default/s/$/ class ssd/' \ + $dir/map.txt || return 1 + crushtool -c $dir/map.txt -o $dir/map-new || return 1 + ceph osd setcrushmap -i $dir/map-new || return 1 + + # + # There can only be one mapping since there only is + # one device with ssd class. + # + ok=false + for delay in 2 4 8 16 32 64 128 256 ; do + if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0" ; then + ok=true + break + fi + sleep $delay + ceph osd dump # for debugging purposes + ceph pg dump # for debugging purposes + done + $ok || return 1 + # + # Writing keeps working because the pool is min_size 1 by + # default. + # + add_something $dir SOMETHING_ELSE || return 1 + + # + # Sanity check that the rule indeed has ssd + # generated bucket with a name including ~ssd. + # + ceph osd crush dump | grep -q '~ssd' || return 1 +} + +function TEST_set_device_class() { + local dir=$1 + + TEST_classes $dir || return 1 + + ceph osd crush set-device-class ssd osd.0 || return 1 + ceph osd crush class ls-osd ssd | grep 0 || return 1 + ceph osd crush set-device-class ssd osd.1 || return 1 + ceph osd crush class ls-osd ssd | grep 1 || return 1 + ceph osd crush set-device-class ssd 0 1 || return 1 # should be idempotent + + ok=false + for delay in 2 4 8 16 32 64 128 256 ; do + if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0 1" ; then + ok=true + break + fi + sleep $delay + ceph osd crush dump + ceph osd dump # for debugging purposes + ceph pg dump # for debugging purposes + done + $ok || return 1 +} + +function TEST_mon_classes() { + local dir=$1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + + test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1 + add_something $dir SOMETHING || return 1 + + # test create and remove class + ceph osd crush class create CLASS || return 1 + ceph osd crush class create CLASS || return 1 # idempotent + ceph osd crush class ls | grep CLASS || return 1 + ceph osd crush class rename CLASS TEMP || return 1 + ceph osd crush class ls | grep TEMP || return 1 + ceph osd crush class rename TEMP CLASS || return 1 + ceph osd crush class ls | grep CLASS || return 1 + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd crush-device-class=CLASS || return 1 + expect_failure $dir EBUSY ceph osd crush class rm CLASS || return 1 + ceph osd erasure-code-profile rm myprofile || return 1 + ceph osd crush class rm CLASS || return 1 + ceph osd crush class rm CLASS || return 1 # test idempotence + + # test rm-device-class + ceph osd crush set-device-class aaa osd.0 || return 1 + ceph osd tree | grep -q 'aaa' || return 1 + ceph osd crush dump | grep -q '~aaa' || return 1 + ceph osd crush tree --show-shadow | grep -q '~aaa' || return 1 + ceph osd crush set-device-class bbb osd.1 || return 1 + ceph osd tree | grep -q 'bbb' || return 1 + ceph osd crush dump | grep -q '~bbb' || return 1 + ceph osd crush tree --show-shadow | grep -q '~bbb' || return 1 + ceph osd crush set-device-class ccc osd.2 || return 1 + ceph osd tree | grep -q 'ccc' || return 1 + ceph osd crush dump | grep -q '~ccc' || return 1 + ceph osd crush tree --show-shadow | grep -q '~ccc' || return 1 + ceph osd crush rm-device-class 0 || return 1 + ceph osd tree | grep -q 'aaa' && return 1 + ceph osd crush class ls | grep -q 'aaa' && return 1 # class 'aaa' should gone + ceph osd crush rm-device-class 1 || return 1 + ceph osd tree | grep -q 'bbb' && return 1 + ceph osd crush class ls | grep -q 'bbb' && return 1 # class 'bbb' should gone + ceph osd crush rm-device-class 2 || return 1 + ceph osd tree | grep -q 'ccc' && return 1 + ceph osd crush class ls | grep -q 'ccc' && return 1 # class 'ccc' should gone + ceph osd crush set-device-class asdf all || return 1 + ceph osd tree | grep -q 'asdf' || return 1 + ceph osd crush dump | grep -q '~asdf' || return 1 + ceph osd crush tree --show-shadow | grep -q '~asdf' || return 1 + ceph osd crush rule create-replicated asdf-rule default host asdf || return 1 + ceph osd crush rm-device-class all || return 1 + ceph osd tree | grep -q 'asdf' && return 1 + ceph osd crush class ls | grep -q 'asdf' || return 1 # still referenced by asdf-rule + + ceph osd crush set-device-class abc osd.2 || return 1 + ceph osd crush move osd.2 root=foo rack=foo-rack host=foo-host || return 1 + out=`ceph osd tree |awk '$1 == 2 && $2 == "abc" {print $0}'` + if [ "$out" == "" ]; then + return 1 + fi + + # verify 'crush move' too + ceph osd crush dump | grep -q 'foo~abc' || return 1 + ceph osd crush tree --show-shadow | grep -q 'foo~abc' || return 1 + ceph osd crush dump | grep -q 'foo-rack~abc' || return 1 + ceph osd crush tree --show-shadow | grep -q 'foo-rack~abc' || return 1 + ceph osd crush dump | grep -q 'foo-host~abc' || return 1 + ceph osd crush tree --show-shadow | grep -q 'foo-host~abc' || return 1 + ceph osd crush rm-device-class osd.2 || return 1 + # restore class, so we can continue to test create-replicated + ceph osd crush set-device-class abc osd.2 || return 1 + + ceph osd crush rule create-replicated foo-rule foo host abc || return 1 + + # test set-device-class implicitly change class + ceph osd crush set-device-class hdd osd.0 || return 1 + expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1 + + # test class rename + ceph osd crush rm-device-class all || return 1 + ceph osd crush set-device-class class_1 all || return 1 + ceph osd crush class ls | grep 'class_1' || return 1 + ceph osd crush tree --show-shadow | grep 'class_1' || return 1 + ceph osd crush rule create-replicated class_1_rule default host class_1 || return 1 + ceph osd crush class rename class_1 class_2 + ceph osd crush class rename class_1 class_2 # idempotent + ceph osd crush class ls | grep 'class_1' && return 1 + ceph osd crush tree --show-shadow | grep 'class_1' && return 1 + ceph osd crush class ls | grep 'class_2' || return 1 + ceph osd crush tree --show-shadow | grep 'class_2' || return 1 +} + +main crush-classes "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-classes.sh" +# End: diff --git a/qa/standalone/erasure-code/test-erasure-code-plugins.sh b/qa/standalone/erasure-code/test-erasure-code-plugins.sh new file mode 100755 index 000000000..b5648d472 --- /dev/null +++ b/qa/standalone/erasure-code/test-erasure-code-plugins.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +set -x + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +arch=$(uname -m) + +case $arch in + i[[3456]]86*|x86_64*|amd64*) + legacy_jerasure_plugins=(jerasure_generic jerasure_sse3 jerasure_sse4) + legacy_shec_plugins=(shec_generic shec_sse3 shec_sse4) + plugins=(jerasure shec lrc isa) + ;; + aarch64*|arm*) + legacy_jerasure_plugins=(jerasure_generic jerasure_neon) + legacy_shec_plugins=(shec_generic shec_neon) + plugins=(jerasure shec lrc) + ;; + *) + echo "unsupported platform ${arch}." + return 1 + ;; +esac + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:17110" # git grep '\<17110\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +function TEST_preload_warning() { + local dir=$1 + + for plugin in ${legacy_jerasure_plugins[*]} ${legacy_shec_plugins[*]}; do + setup $dir || return 1 + run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1 + run_mgr $dir x || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/mon.a.log || return 1 + grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/osd.0.log || return 1 + teardown $dir || return 1 + done + return 0 +} + +function TEST_preload_no_warning() { + local dir=$1 + + for plugin in ${plugins[*]}; do + setup $dir || return 1 + run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1 + run_mgr $dir x || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/mon.a.log || return 1 + ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/osd.0.log || return 1 + teardown $dir || return 1 + done + + return 0 +} + +function TEST_preload_no_warning_default() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + ! grep "WARNING: osd_erasure_code_plugins" $dir/mon.a.log || return 1 + ! grep "WARNING: osd_erasure_code_plugins" $dir/osd.0.log || return 1 + teardown $dir || return 1 + + return 0 +} + +function TEST_ec_profile_warning() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 2) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + for plugin in ${legacy_jerasure_plugins[*]}; do + ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd technique=reed_sol_van plugin=${plugin} || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1 + done + + for plugin in ${legacy_shec_plugins[*]}; do + ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd plugin=${plugin} || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1 + done + + teardown $dir || return 1 +} + +main test-erasure-code-plugins "$@" diff --git a/qa/standalone/erasure-code/test-erasure-code.sh b/qa/standalone/erasure-code/test-erasure-code.sh new file mode 100755 index 000000000..b93151233 --- /dev/null +++ b/qa/standalone/erasure-code/test-erasure-code.sh @@ -0,0 +1,337 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7101" # git grep '\<7101\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --mon-osd-prime-pg-temp=false" + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 + for id in $(seq 0 10) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 + create_erasure_coded_pool ecpool || return 1 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done + + delete_pool ecpool || return 1 + teardown $dir || return 1 +} + +function create_erasure_coded_pool() { + local poolname=$1 + + ceph osd erasure-code-profile set myprofile \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure myprofile \ + || return 1 + wait_for_clean || return 1 +} + +function rados_put_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + # + # take out an OSD used to store the object and + # check the object can still be retrieved, which implies + # recovery + # + local -a initial_osds=($(get_osds $poolname $objname)) + local last=$((${#initial_osds[@]} - 1)) + ceph osd out ${initial_osds[$last]} || return 1 + + # give the osdmap up to 5 seconds to refresh + sleep 5 + ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1 + + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + ceph osd in ${initial_osds[$last]} || return 1 + + rm $dir/ORIGINAL +} + +function rados_osds_out_in() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + + for marker in FFFF GGGG HHHH IIII ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + # + # take out two OSDs used to store the object, wait for the cluster + # to be clean (i.e. all PG are clean and active) again which + # implies the PG have been moved to use the remaining OSDs. Check + # the object can still be retrieved. + # + wait_for_clean || return 1 + local osds_list=$(get_osds $poolname $objname) + local -a osds=($osds_list) + for osd in 0 1 ; do + ceph osd out ${osds[$osd]} || return 1 + done + wait_for_clean || return 1 + # + # verify the object is no longer mapped to the osds that are out + # + for osd in 0 1 ; do + ! get_osds $poolname $objname | grep '\<'${osds[$osd]}'\>' || return 1 + done + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + # + # bring the osds back in, , wait for the cluster + # to be clean (i.e. all PG are clean and active) again which + # implies the PG go back to using the same osds as before + # + for osd in 0 1 ; do + ceph osd in ${osds[$osd]} || return 1 + done + wait_for_clean || return 1 + test "$osds_list" = "$(get_osds $poolname $objname)" || return 1 + rm $dir/ORIGINAL +} + +function TEST_rados_put_get_lrc_advanced() { + local dir=$1 + local poolname=pool-lrc-a + local profile=profile-lrc-a + + ceph osd erasure-code-profile set $profile \ + plugin=lrc \ + mapping=DD_ \ + crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \ + layers='[ [ "DDc", "" ] ]' || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_rados_put_get_lrc_kml() { + local dir=$1 + local poolname=pool-lrc + local profile=profile-lrc + + ceph osd erasure-code-profile set $profile \ + plugin=lrc \ + k=4 m=2 l=3 \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_rados_put_get_isa() { + if ! erasure_code_plugin_exists isa ; then + echo "SKIP because plugin isa has not been built" + return 0 + fi + local dir=$1 + local poolname=pool-isa + + ceph osd erasure-code-profile set profile-isa \ + plugin=isa \ + crush-failure-domain=osd || return 1 + create_pool $poolname 1 1 erasure profile-isa \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname +} + +function TEST_rados_put_get_jerasure() { + local dir=$1 + + rados_put_get $dir ecpool || return 1 + + local poolname=pool-jerasure + local profile=profile-jerasure + + ceph osd erasure-code-profile set $profile \ + plugin=jerasure \ + k=4 m=2 \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + rados_osds_out_in $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_rados_put_get_shec() { + local dir=$1 + + local poolname=pool-shec + local profile=profile-shec + + ceph osd erasure-code-profile set $profile \ + plugin=shec \ + k=2 m=1 c=1 \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_alignment_constraints() { + local payload=ABC + echo "$payload" > $dir/ORIGINAL + # + # Verify that the rados command enforces alignment constraints + # imposed by the stripe width + # See http://tracker.ceph.com/issues/8622 + # + local stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit) + eval local $(ceph osd erasure-code-profile get myprofile | grep k=) + local block_size=$((stripe_unit * k - 1)) + dd if=/dev/zero of=$dir/ORIGINAL bs=$block_size count=2 + rados --block-size=$block_size \ + --pool ecpool put UNALIGNED $dir/ORIGINAL || return 1 + rm $dir/ORIGINAL +} + +function chunk_size() { + echo $(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit) +} + +# +# By default an object will be split in two (k=2) with the first part +# of the object in the first OSD of the up set and the second part in +# the next OSD in the up set. This layout is defined by the mapping +# parameter and this function helps verify that the first and second +# part of the object are located in the OSD where they should be. +# +function verify_chunk_mapping() { + local dir=$1 + local poolname=$2 + local first=$3 + local second=$4 + + local payload=$(printf '%*s' $(chunk_size) FIRST$poolname ; printf '%*s' $(chunk_size) SECOND$poolname) + echo -n "$payload" > $dir/ORIGINAL + + rados --pool $poolname put SOMETHING$poolname $dir/ORIGINAL || return 1 + rados --pool $poolname get SOMETHING$poolname $dir/COPY || return 1 + local -a osds=($(get_osds $poolname SOMETHING$poolname)) + for (( i = 0; i < ${#osds[@]}; i++ )) ; do + ceph daemon osd.${osds[$i]} flush_journal + done + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + local -a osds=($(get_osds $poolname SOMETHING$poolname)) + objectstore_tool $dir ${osds[$first]} SOMETHING$poolname get-bytes | grep --quiet FIRST$poolname || return 1 + objectstore_tool $dir ${osds[$second]} SOMETHING$poolname get-bytes | grep --quiet SECOND$poolname || return 1 +} + +function TEST_chunk_mapping() { + local dir=$1 + + # + # mapping=DD_ is the default: + # first OSD (i.e. 0) in the up set has the first part of the object + # second OSD (i.e. 1) in the up set has the second part of the object + # + verify_chunk_mapping $dir ecpool 0 1 || return 1 + + ceph osd erasure-code-profile set remap-profile \ + plugin=lrc \ + layers='[ [ "cDD", "" ] ]' \ + mapping='_DD' \ + crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1 + ceph osd erasure-code-profile get remap-profile + create_pool remap-pool 12 12 erasure remap-profile \ + || return 1 + + # + # mapping=_DD + # second OSD (i.e. 1) in the up set has the first part of the object + # third OSD (i.e. 2) in the up set has the second part of the object + # + verify_chunk_mapping $dir remap-pool 1 2 || return 1 + + delete_pool remap-pool + ceph osd erasure-code-profile rm remap-profile +} + +main test-erasure-code "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-code.sh" +# End: diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh new file mode 100755 index 000000000..42c538eb9 --- /dev/null +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -0,0 +1,700 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# +# Author: Kefu Chai <kchai@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + create_pool rbd 4 || return 1 + + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function setup_osds() { + local count=$1 + shift + + for id in $(seq 0 $(expr $count - 1)) ; do + run_osd $dir $id || return 1 + done + + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 +} + +function get_state() { + local pgid=$1 + local sname=state + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function create_erasure_coded_pool() { + local poolname=$1 + shift + local k=$1 + shift + local m=$1 + shift + + ceph osd erasure-code-profile set myprofile \ + plugin=jerasure \ + k=$k m=$m \ + crush-failure-domain=osd || return 1 + create_pool $poolname 1 1 erasure myprofile \ + || return 1 + wait_for_clean || return 1 +} + +function delete_erasure_coded_pool() { + local poolname=$1 + ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it + ceph osd erasure-code-profile rm myprofile +} + +function rados_put() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 +} + +function rados_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + local expect=${4:-ok} + + # + # Expect a failure to get object + # + if [ $expect = "fail" ]; + then + ! rados --pool $poolname get $objname $dir/COPY + return + fi + # + # get an object, compare with $dir/ORIGINAL + # + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY +} + + +function inject_remove() { + local pooltype=$1 + shift + local which=$1 + shift + local poolname=$1 + shift + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + objectstore_tool $dir $osd_id $objname remove || return 1 +} + +# Test with an inject error +function rados_put_get_data() { + local inject=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + local arg=$1 + + # inject eio to speificied shard + # + local poolname=pool-jerasure + local objname=obj-$inject-$$-$shard_id + rados_put $dir $poolname $objname || return 1 + inject_$inject ec data $poolname $objname $dir $shard_id || return 1 + rados_get $dir $poolname $objname || return 1 + + if [ "$arg" = "recovery" ]; + then + # + # take out the last OSD used to store the object, + # bring it back, and check for clean PGs which means + # recovery didn't crash the primary. + # + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd out ${last_osd} || return 1 + ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1 + ceph osd in ${last_osd} || return 1 + activate_osd $dir ${last_osd} || return 1 + wait_for_clean || return 1 + # Won't check for eio on get here -- recovery above might have fixed it + else + shard_id=$(expr $shard_id + 1) + inject_$inject ec data $poolname $objname $dir $shard_id || return 1 + rados_get $dir $poolname $objname fail || return 1 + rm $dir/ORIGINAL + fi + +} + +# Change the size of speificied shard +# +function set_size() { + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + local bytes=$1 + shift + local mode=${1} + + local poolname=pool-jerasure + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + ceph osd set noout + if [ "$mode" = "add" ]; + then + objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1 + dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT + elif [ "$bytes" = "0" ]; + then + touch $dir/CORRUPT + else + dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT + fi + objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1 + rm -f $dir/CORRUPT + ceph osd unset noout +} + +function rados_get_data_bad_size() { + local dir=$1 + shift + local shard_id=$1 + shift + local bytes=$1 + shift + local mode=${1:-set} + + local poolname=pool-jerasure + local objname=obj-size-$$-$shard_id-$bytes + rados_put $dir $poolname $objname || return 1 + + # Change the size of speificied shard + # + set_size $objname $dir $shard_id $bytes $mode || return 1 + + rados_get $dir $poolname $objname || return 1 + + # Leave objname and modify another shard + shard_id=$(expr $shard_id + 1) + set_size $objname $dir $shard_id $bytes $mode || return 1 + rados_get $dir $poolname $objname fail || return 1 + rm $dir/ORIGINAL +} + +# +# These two test cases try to validate the following behavior: +# For object on EC pool, if there is one shard having read error ( +# either primary or replica), client can still read object. +# +# If 2 shards have read errors the client will get an error. +# +function TEST_rados_get_subread_eio_shard_0() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio on primary OSD (0) and replica OSD (1) + local shard_id=0 + rados_put_get_data eio $dir $shard_id || return 1 + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_subread_eio_shard_1() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio into replicas OSD (1) and OSD (2) + local shard_id=1 + rados_put_get_data eio $dir $shard_id || return 1 + delete_erasure_coded_pool $poolname +} + +# We don't remove the object from the primary because +# that just causes it to appear to be missing + +function TEST_rados_get_subread_missing() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject remove into replicas OSD (1) and OSD (2) + local shard_id=1 + rados_put_get_data remove $dir $shard_id || return 1 + delete_erasure_coded_pool $poolname +} + +# +# +# These two test cases try to validate that following behavior: +# For object on EC pool, if there is one shard which an incorrect +# size this will cause an internal read error, client can still read object. +# +# If 2 shards have incorrect size the client will get an error. +# +function TEST_rados_get_bad_size_shard_0() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # Set incorrect size into primary OSD (0) and replica OSD (1) + local shard_id=0 + rados_get_data_bad_size $dir $shard_id 10 || return 1 + rados_get_data_bad_size $dir $shard_id 0 || return 1 + rados_get_data_bad_size $dir $shard_id 256 add || return 1 + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_bad_size_shard_1() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # Set incorrect size into replicas OSD (1) and OSD (2) + local shard_id=1 + rados_get_data_bad_size $dir $shard_id 10 || return 1 + rados_get_data_bad_size $dir $shard_id 0 || return 1 + rados_get_data_bad_size $dir $shard_id 256 add || return 1 + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_with_subreadall_eio_shard_0() { + local dir=$1 + local shard_id=0 + + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio on primary OSD (0) + rados_put_get_data eio $dir $shard_id recovery || return 1 + + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_with_subreadall_eio_shard_1() { + local dir=$1 + local shard_id=1 + + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio on replica OSD (1) + rados_put_get_data eio $dir $shard_id recovery || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery the object attr read error +function TEST_ec_object_attr_read_error() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + local primary_osd=$(get_primary $poolname $objname) + # Kill primary OSD + kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1 + + # Write data + rados_put $dir $poolname $objname || return 1 + + # Inject eio, shard 1 is the one read attr + inject_eio ec mdata $poolname $objname $dir 1 || return 1 + + # Restart OSD + activate_osd $dir ${primary_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery the first k copies aren't all available +function TEST_ec_single_recovery_error() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname $objname || return 1 + inject_eio ec data $poolname $objname $dir 0 || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery when repeated reads are needed due to EIO +function TEST_ec_recovery_multiple_errors() { + local dir=$1 + local objname=myobject + + setup_osds 9 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 4 4 || return 1 + + rados_put $dir $poolname $objname || return 1 + inject_eio ec data $poolname $objname $dir 0 || return 1 + # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets + # tried as well. Make that fail to test multiple-EIO handling. + inject_eio ec data $poolname $objname $dir 3 || return 1 + inject_eio ec data $poolname $objname $dir 4 || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery when there's only one shard to recover, but multiple +# objects recovering in one RecoveryOp +function TEST_ec_recovery_multiple_objects() { + local dir=$1 + local objname=myobject + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' + setup_osds 7 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname test1 + rados_put $dir $poolname test2 + rados_put $dir $poolname test3 + + ceph osd out 0 || return 1 + + # Cluster should recover these objects all at once + wait_for_clean || return 1 + + rados_get $dir $poolname test1 + rados_get $dir $poolname test2 + rados_get $dir $poolname test3 + + delete_erasure_coded_pool $poolname +} + +# test multi-object recovery when the one missing shard gets EIO +function TEST_ec_recovery_multiple_objects_eio() { + local dir=$1 + local objname=myobject + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' + setup_osds 7 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname test1 + rados_put $dir $poolname test2 + rados_put $dir $poolname test3 + + # can't read from this shard anymore + inject_eio ec data $poolname $objname $dir 0 || return 1 + ceph osd out 0 || return 1 + + # Cluster should recover these objects all at once + wait_for_clean || return 1 + + rados_get $dir $poolname test1 + rados_get $dir $poolname test2 + rados_get $dir $poolname test3 + + delete_erasure_coded_pool $poolname +} + +# Test backfill with unfound object +function TEST_ec_backfill_unfound() { + local dir=$1 + local objname=myobject + local lastobj=300 + # Must be between 1 and $lastobj + local testobj=obj250 + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 5 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + local primary=$(get_primary $poolname $objname) + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio ec data $poolname $testobj $dir 0 || return 1 + inject_eio ec data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 240); do + state=$(get_state 2.0) + echo $state | grep backfill_unfound + if [ "$?" = "0" ]; then + break + fi + echo $state + sleep 1 + done + + ceph pg dump pgs + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + sleep 5 + + ceph pg dump pgs + ceph pg 2.0 list_unfound + ceph pg 2.0 query + + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound") + test "$check" == "true" || return 1 + + eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].status) + test "$check" == "osd is down" || return 1 + + eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].osd) + test "$check" == "2(4)" || return 1 + + activate_osd $dir ${last_osd} || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_erasure_coded_pool $poolname +} + +# Test recovery with unfound object +function TEST_ec_recovery_unfound() { + local dir=$1 + local objname=myobject + local lastobj=100 + # Must be between 1 and $lastobj + local testobj=obj75 + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 5 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio ec data $poolname $testobj $dir 0 || return 1 + inject_eio ec data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 100); do + state=$(get_state 2.0) + echo $state | grep recovery_unfound + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound + ceph pg 2.0 query + + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound") + test "$check" == "true" || return 1 + + check=$(ceph pg 2.0 list_unfound | jq ".might_have_unfound | length") + test $check == 0 || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_erasure_coded_pool $poolname +} + +main test-erasure-eio "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh" +# End: diff --git a/qa/standalone/mgr/balancer.sh b/qa/standalone/mgr/balancer.sh new file mode 100755 index 000000000..2d7b2f35d --- /dev/null +++ b/qa/standalone/mgr/balancer.sh @@ -0,0 +1,223 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +TEST_POOL1=test1 +TEST_POOL2=test2 + +function TEST_balancer() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_pool $TEST_POOL1 8 + create_pool $TEST_POOL2 8 + + wait_for_clean || return 1 + + ceph pg dump pgs + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "upmap" || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + + ceph balancer ls || return 1 + PLANS=$(ceph balancer ls) + test "$PLANS" = "[]" || return 1 + ceph balancer eval || return 1 + EVAL="$(ceph balancer eval)" + test "$EVAL" = "current cluster score 0.000000 (lower is better)" + ceph balancer eval-verbose || return 1 + + ceph balancer pool add $TEST_POOL1 || return 1 + ceph balancer pool add $TEST_POOL2 || return 1 + ceph balancer pool ls || return 1 + eval POOL=$(ceph balancer pool ls | jq 'sort | .[0]') + test "$POOL" = "$TEST_POOL1" || return 1 + eval POOL=$(ceph balancer pool ls | jq 'sort | .[1]') + test "$POOL" = "$TEST_POOL2" || return 1 + ceph balancer pool rm $TEST_POOL1 || return 1 + ceph balancer pool rm $TEST_POOL2 || return 1 + ceph balancer pool ls || return 1 + ceph balancer pool add $TEST_POOL1 || return 1 + + ceph balancer mode crush-compat || return 1 + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "crush-compat" || return 1 + ceph balancer off || return 1 + ! ceph balancer optimize plan_crush $TEST_POOL1 || return 1 + ceph balancer status || return 1 + eval RESULT=$(ceph balancer status | jq '.optimize_result') + test "$RESULT" = "Distribution is already perfect" || return 1 + + ceph balancer on || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + sleep 2 + ceph balancer status || return 1 + ceph balancer off || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + sleep 2 + + ceph balancer reset || return 1 + + ceph balancer mode upmap || return 1 + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "upmap" || return 1 + ! ceph balancer optimize plan_upmap $TEST_POOL || return 1 + ceph balancer status || return 1 + eval RESULT=$(ceph balancer status | jq '.optimize_result') + test "$RESULT" = "Unable to find further optimization, or pool(s) pg_num is decreasing, or distribution is already perfect" || return 1 + + ceph balancer on || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + sleep 2 + ceph balancer status || return 1 + ceph balancer off || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + + teardown $dir || return 1 +} + +function TEST_balancer2() { + local dir=$1 + TEST_PGS1=118 + TEST_PGS2=132 + TOTAL_PGS=$(expr $TEST_PGS1 + $TEST_PGS2) + OSDS=5 + DEFAULT_REPLICAS=3 + # Integer average of PGS per OSD (70.8), so each OSD >= this + FINAL_PER_OSD1=$(expr \( $TEST_PGS1 \* $DEFAULT_REPLICAS \) / $OSDS) + # Integer average of PGS per OSD (150) + FINAL_PER_OSD2=$(expr \( \( $TEST_PGS1 + $TEST_PGS2 \) \* $DEFAULT_REPLICAS \) / $OSDS) + + CEPH_ARGS+="--osd_pool_default_pg_autoscale_mode=off " + CEPH_ARGS+="--debug_osd=20 " + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $i || return 1 + done + + ceph osd set-require-min-compat-client luminous + ceph config set mgr mgr/balancer/upmap_max_deviation 1 + ceph balancer mode upmap || return 1 + ceph balancer on || return 1 + ceph config set mgr mgr/balancer/sleep_interval 5 + + create_pool $TEST_POOL1 $TEST_PGS1 + + wait_for_clean || return 1 + + # Wait up to 2 minutes + OK=no + for i in $(seq 1 25) + do + sleep 5 + if grep -q "Optimization plan is almost perfect" $dir/mgr.x.log + then + OK=yes + break + fi + done + test $OK = "yes" || return 1 + # Plan is found, but PGs still need to move + sleep 10 + wait_for_clean || return 1 + ceph osd df + + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + + create_pool $TEST_POOL2 $TEST_PGS2 + + # Wait up to 2 minutes + OK=no + for i in $(seq 1 25) + do + sleep 5 + COUNT=$(grep "Optimization plan is almost perfect" $dir/mgr.x.log | wc -l) + if test $COUNT = "2" + then + OK=yes + break + fi + done + test $OK = "yes" || return 1 + # Plan is found, but PGs still need to move + sleep 10 + wait_for_clean || return 1 + ceph osd df + + # We should be with plus or minus 2 of FINAL_PER_OSD2 + # This is because here each pool is balanced independently + MIN=$(expr $FINAL_PER_OSD2 - 2) + MAX=$(expr $FINAL_PER_OSD2 + 2) + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + + teardown $dir || return 1 +} + +main balancer "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh balancer.sh" +# End: diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh new file mode 100755 index 000000000..59f002584 --- /dev/null +++ b/qa/standalone/misc/mclock-config.sh @@ -0,0 +1,467 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Red Hat <contact@redhat.com> +# +# Author: Sridhar Seshasayee <sseshasa@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-mclock 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_profile_builtin_to_custom() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify the default mclock profile on the OSD + local mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + mclock_profile=$(eval echo $mclock_profile) + test "$mclock_profile" = "high_recovery_ops" || return 1 + + # Change the mclock profile to 'custom' + ceph tell osd.0 config set osd_mclock_profile custom || return 1 + + # Verify that the mclock profile is set to 'custom' on the OSDs + mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_profile | jq .osd_mclock_profile) + mclock_profile=$(eval echo $mclock_profile) + test "$mclock_profile" = "custom" || return 1 + + # Change a mclock config param and confirm the change + local client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + echo "client_res = $client_res" + local client_res_new=$(echo "$client_res + 0.1" | bc -l) + echo "client_res_new = $client_res_new" + ceph config set osd.0 osd_mclock_scheduler_client_res \ + $client_res_new || return 1 + + # Check value in config monitor db + local res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + + teardown $dir || return 1 +} + +function TEST_profile_custom_to_builtin() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify the default mclock profile on the OSD + local def_mclock_profile + def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$def_mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + local orig_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + orig_mclock_profile=$(eval echo $orig_mclock_profile) + test $orig_mclock_profile = "high_recovery_ops" || return 1 + + # Change the mclock profile to 'custom' + ceph tell osd.0 config set osd_mclock_profile custom || return 1 + + # Verify that the mclock profile is set to 'custom' on the OSDs + local mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile | \ + jq .osd_mclock_profile) + mclock_profile=$(eval echo $mclock_profile) + test $mclock_profile = "custom" || return 1 + + # Save the original client reservations allocated to the OSDs + local client_res + client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + echo "Original client_res for osd.0 = $client_res" + + # Change a mclock config param and confirm the change + local client_res_new=$(echo "$client_res + 0.1" | bc -l) + echo "client_res_new = $client_res_new" + ceph config set osd osd_mclock_scheduler_client_res \ + $client_res_new || return 1 + # Check value in config monitor db + local res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + + # Switch the mclock profile back to the original built-in profile. + # The config subsystem prevents the overwrite of the changed QoS config + # option above i.e. osd_mclock_scheduler_client_res. This fact is verified + # before proceeding to remove the entry from the config monitor db. After + # the config entry is removed, the original value for the config option is + # restored and is verified. + ceph tell osd.0 config set osd_mclock_profile $orig_mclock_profile || return 1 + # Verify that the mclock profile is set back to the original on the OSD + eval mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile | \ + jq .osd_mclock_profile) + #mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$mclock_profile" = "$orig_mclock_profile" || return 1 + + # Verify that the new value is still in effect + # Check value in config monitor db + local res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + + # Remove the changed QoS config option from monitor db + ceph config rm osd osd_mclock_scheduler_client_res || return 1 + + sleep 5 # Allow time for change to take effect + + # Verify that the original values are now restored + # Check value in config monitor db + res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != 0.0" | bc -l) )); then + return 1 + fi + + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res" | bc -l) )); then + return 1 + fi + + teardown $dir || return 1 +} + +function TEST_recovery_limit_adjustment_mclock() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + local recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_recovery_max_active) + # Get default value + echo "$recoveries" | grep --quiet 'osd_recovery_max_active' || return 1 + + # Change the recovery limit without setting + # osd_mclock_override_recovery_settings option. Verify that the recovery + # limit is retained at its default value. + ceph config set osd.0 osd_recovery_max_active 10 || return 1 + sleep 2 # Allow time for change to take effect + local max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_recovery_max_active) + test "$max_recoveries" = "$recoveries" || return 1 + + # Change recovery limit after setting osd_mclock_override_recovery_settings. + # Verify that the recovery limit is modified. + ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1 + ceph config set osd.0 osd_recovery_max_active 10 || return 1 + sleep 2 # Allow time for change to take effect + max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_recovery_max_active) + test "$max_recoveries" = '{"osd_recovery_max_active":"10"}' || return 1 + + teardown $dir || return 1 +} + +function TEST_backfill_limit_adjustment_mclock() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + # Get default value + echo "osd_max_backfills: $backfills" || return 1 + + # Change the backfill limit without setting + # osd_mclock_override_recovery_settings option. Verify that the backfill + # limit is retained at its default value. + ceph config set osd.0 osd_max_backfills 20 || return 1 + sleep 2 # Allow time for change to take effect + local max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = $backfills || return 1 + + # Verify local and async reserver settings are not changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = $backfills || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = $backfills || return 1 + + # Change backfills limit after setting osd_mclock_override_recovery_settings. + # Verify that the backfills limit is modified. + ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1 + ceph config set osd.0 osd_max_backfills 20 || return 1 + sleep 2 # Allow time for change to take effect + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = 20 || return 1 + + # Verify local and async reserver settings are changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + + # Kill osd and bring it back up. + # Confirm that the backfill settings are retained. + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + activate_osd $dir 0 --osd-op-queue=mclock_scheduler || return 1 + + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = 20 || return 1 + + # Verify local and async reserver settings are changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + + teardown $dir || return 1 +} + +function TEST_profile_disallow_builtin_params_modify() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify that the default mclock profile is set on the OSD + local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$def_mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + cur_mclock_profile=$(eval echo $cur_mclock_profile) + test $cur_mclock_profile = "high_recovery_ops" || return 1 + + declare -a options=("osd_mclock_scheduler_background_recovery_res" + "osd_mclock_scheduler_client_res") + + local retries=10 + local errors=0 + for opt in "${options[@]}" + do + # Try and change a mclock config param and confirm that no change occurred + local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get $opt | jq .$opt | bc) + local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l) + ceph config set osd.0 $opt $opt_val_new || return 1 + + # Check configuration values + for count in $(seq 0 $(expr $retries - 1)) + do + errors=0 + sleep 2 # Allow time for changes to take effect + + echo "Check configuration values - Attempt#: $count" + # Check configuration value on Mon store (or the default) for the osd + local res=$(ceph config get osd.0 $opt) || return 1 + echo "Mon db (or default): osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check running configuration value using "config show" cmd + res=$(ceph config show osd.0 | grep $opt |\ + awk '{ print $2 }' | bc ) || return 1 + echo "Running config: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check value in the in-memory 'values' map is unmodified + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get $opt | jq .$opt | bc) + echo "Values map: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check if we succeeded or exhausted retry count + if [ $errors -eq 0 ] + then + break + elif [ $count -eq $(expr $retries - 1) ] + then + return 1 + fi + done + done + + teardown $dir || return 1 +} + +function TEST_profile_disallow_builtin_params_override() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify that the default mclock profile is set on the OSD + local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$def_mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + cur_mclock_profile=$(eval echo $cur_mclock_profile) + test $cur_mclock_profile = "high_recovery_ops" || return 1 + + declare -a options=("osd_mclock_scheduler_background_recovery_res" + "osd_mclock_scheduler_client_res") + + local retries=10 + local errors=0 + for opt in "${options[@]}" + do + # Override a mclock config param and confirm that no change occurred + local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get $opt | jq .$opt | bc) + local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l) + ceph tell osd.0 config set $opt $opt_val_new || return 1 + + # Check configuration values + for count in $(seq 0 $(expr $retries - 1)) + do + errors=0 + sleep 2 # Allow time for changes to take effect + + echo "Check configuration values - Attempt#: $count" + # Check configuration value on Mon store (or the default) for the osd + local res=$(ceph config get osd.0 $opt) || return 1 + echo "Mon db (or default): osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check running configuration value using "config show" cmd + res=$(ceph config show osd.0 | grep $opt |\ + awk '{ print $2 }' | bc ) || return 1 + echo "Running config: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check value in the in-memory 'values' map is unmodified + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get $opt | jq .$opt | bc) + echo "Values map: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check if we succeeded or exhausted retry count + if [ $errors -eq 0 ] + then + break + elif [ $count -eq $(expr $retries - 1) ] + then + return 1 + fi + done + done + + teardown $dir || return 1 +} + +main mclock-config "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh mclock-config.sh" +# End: diff --git a/qa/standalone/misc/network-ping.sh b/qa/standalone/misc/network-ping.sh new file mode 100755 index 000000000..4745108c5 --- /dev/null +++ b/qa/standalone/misc/network-ping.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug_disable_randomized_ping=true " + CEPH_ARGS+="--debug_heartbeat_testing_span=5 " + CEPH_ARGS+="--osd_heartbeat_interval=1 " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_network_ping_test1() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 + + # Get 1 cycle worth of ping data "1 minute" + sleep 10 + flush_pg_stats + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + # Wait another 4 cycles to get "5 minute interval" + sleep 20 + flush_pg_stats + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + + # Wait another 10 cycles to get "15 minute interval" + sleep 50 + flush_pg_stats + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + # Just check the threshold output matches the input + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json + test "$(cat $dir/json | jq '.threshold')" = "99" || return 1 + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json + test "$(cat $dir/json | jq '.threshold')" = "98" || return 1 + + rm -f $dir/json +} + +# Test setting of mon_warn_on_slow_ping_time very low to +# get health warning +function TEST_network_ping_test2() { + local dir=$1 + + export CEPH_ARGS + export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + ceph osd crush add-bucket dc1 datacenter + ceph osd crush add-bucket dc2 datacenter + ceph osd crush add-bucket dc3 datacenter + ceph osd crush add-bucket rack1 rack + ceph osd crush add-bucket rack2 rack + ceph osd crush add-bucket rack3 rack + ceph osd crush add-bucket host1 host + ceph osd crush add-bucket host2 host + ceph osd crush add-bucket host3 host + ceph osd crush move dc1 root=default + ceph osd crush move dc2 root=default + ceph osd crush move dc3 root=default + ceph osd crush move rack1 datacenter=dc1 + ceph osd crush move rack2 datacenter=dc2 + ceph osd crush move rack3 datacenter=dc3 + ceph osd crush move host1 rack=rack1 + ceph osd crush move host2 rack=rack2 + ceph osd crush move host3 rack=rack3 + ceph osd crush set osd.0 1.0 host=host1 + ceph osd crush set osd.1 1.0 host=host2 + ceph osd crush set osd.2 1.0 host=host3 + ceph osd crush rule create-simple myrule default host firstn + + create_pool foo 16 16 replicated myrule + + # write some objects + timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 + + # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings) + sleep 10 + flush_pg_stats + + ceph health | tee $dir/health + grep -q "Slow OSD heartbeats" $dir/health || return 1 + + ceph health detail | tee $dir/health + grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1 + grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1 + grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \ +to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1 + rm -f $dir/health +} + +main network-ping "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh" +# End: diff --git a/qa/standalone/misc/ok-to-stop.sh b/qa/standalone/misc/ok-to-stop.sh new file mode 100755 index 000000000..dc9e7422f --- /dev/null +++ b/qa/standalone/misc/ok-to-stop.sh @@ -0,0 +1,296 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7150" # git grep '\<7150\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7151" # git grep '\<7151\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7152" # git grep '\<7152\>' : there must be only one + export CEPH_MON_D="127.0.0.1:7153" # git grep '\<7153\>' : there must be only one + export CEPH_MON_E="127.0.0.1:7154" # git grep '\<7154\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + export ORIG_CEPH_ARGS="$CEPH_ARGS" + + local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + kill_daemons $dir KILL || return 1 + teardown $dir || return 1 + done +} + +function TEST_1_mon_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + + ceph mon ok-to-stop dne || return 1 + ! ceph mon ok-to-stop a || return 1 + + ! ceph mon ok-to-add-offline || return 1 + + ! ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm dne || return 1 +} + +function TEST_2_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + + ceph mon ok-to-stop dne || return 1 + ! ceph mon ok-to-stop a || return 1 + ! ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop a b || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm dne || return 1 +} + +function TEST_3_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + run_mon $dir c --public-addr=$CEPH_MON_C || return 1 + wait_for_quorum 60 3 + + ceph mon ok-to-stop dne || return 1 + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ! ceph mon ok-to-stop a b || return 1 + ! ceph mon ok-to-stop b c || return 1 + ! ceph mon ok-to-stop a b c || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + + kill_daemons $dir KILL mon.b + wait_for_quorum 60 2 + + ! ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop c || return 1 + + ! ceph mon ok-to-add-offline || return 1 + + ! ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ! ceph mon ok-to-rm c || return 1 +} + +function TEST_4_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + run_mon $dir c --public-addr=$CEPH_MON_C || return 1 + run_mon $dir d --public-addr=$CEPH_MON_D || return 1 + wait_for_quorum 60 4 + + ceph mon ok-to-stop dne || return 1 + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ceph mon ok-to-stop d || return 1 + ! ceph mon ok-to-stop a b || return 1 + ! ceph mon ok-to-stop c d || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + + kill_daemons $dir KILL mon.a + wait_for_quorum 60 3 + + ceph mon ok-to-stop a || return 1 + ! ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop c || return 1 + ! ceph mon ok-to-stop d || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + ceph mon ok-to-rm d || return 1 +} + +function TEST_5_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + run_mon $dir c --public-addr=$CEPH_MON_C || return 1 + run_mon $dir d --public-addr=$CEPH_MON_D || return 1 + run_mon $dir e --public-addr=$CEPH_MON_E || return 1 + wait_for_quorum 60 5 + + ceph mon ok-to-stop dne || return 1 + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ceph mon ok-to-stop d || return 1 + ceph mon ok-to-stop e || return 1 + ceph mon ok-to-stop a b || return 1 + ceph mon ok-to-stop c d || return 1 + ! ceph mon ok-to-stop a b c || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + ceph mon ok-to-rm d || return 1 + ceph mon ok-to-rm e || return 1 + + kill_daemons $dir KILL mon.a + wait_for_quorum 60 4 + + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ceph mon ok-to-stop d || return 1 + ceph mon ok-to-stop e || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + ceph mon ok-to-rm d || return 1 + ceph mon ok-to-rm e || return 1 + + kill_daemons $dir KILL mon.e + wait_for_quorum 60 3 + + ceph mon ok-to-stop a || return 1 + ! ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop c || return 1 + ! ceph mon ok-to-stop d || return 1 + ceph mon ok-to-stop e || return 1 + + ! ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ! ceph mon ok-to-rm b || return 1 + ! ceph mon ok-to-rm c || return 1 + ! ceph mon ok-to-rm d || return 1 + ceph mon ok-to-rm e || return 1 +} + +function TEST_0_mds() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_mds $dir a || return 1 + + ceph osd pool create meta 1 || return 1 + ceph osd pool create data 1 || return 1 + ceph fs new myfs meta data || return 1 + sleep 5 + + ! ceph mds ok-to-stop a || return 1 + ! ceph mds ok-to-stop a dne || return 1 + ceph mds ok-to-stop dne || return 1 + + run_mds $dir b || return 1 + sleep 5 + + ceph mds ok-to-stop a || return 1 + ceph mds ok-to-stop b || return 1 + ! ceph mds ok-to-stop a b || return 1 + ceph mds ok-to-stop a dne1 dne2 || return 1 + ceph mds ok-to-stop b dne || return 1 + ! ceph mds ok-to-stop a b dne || return 1 + ceph mds ok-to-stop dne1 dne2 || return 1 + + kill_daemons $dir KILL mds.a +} + +function TEST_0_osd() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd || return 1 + ceph osd pool create ec erasure ec-profile || return 1 + + wait_for_clean || return 1 + + # with min_size 3, we can stop only 1 osd + ceph osd pool set ec min_size 3 || return 1 + wait_for_clean || return 1 + + ceph osd ok-to-stop 0 || return 1 + ceph osd ok-to-stop 1 || return 1 + ceph osd ok-to-stop 2 || return 1 + ceph osd ok-to-stop 3 || return 1 + ! ceph osd ok-to-stop 0 1 || return 1 + ! ceph osd ok-to-stop 2 3 || return 1 + ceph osd ok-to-stop 0 --max 2 | grep '[0]' || return 1 + ceph osd ok-to-stop 1 --max 2 | grep '[1]' || return 1 + + # with min_size 2 we can stop 1 osds + ceph osd pool set ec min_size 2 || return 1 + wait_for_clean || return 1 + + ceph osd ok-to-stop 0 1 || return 1 + ceph osd ok-to-stop 2 3 || return 1 + ! ceph osd ok-to-stop 0 1 2 || return 1 + ! ceph osd ok-to-stop 1 2 3 || return 1 + + ceph osd ok-to-stop 0 --max 2 | grep '[0,1]' || return 1 + ceph osd ok-to-stop 0 --max 20 | grep '[0,1]' || return 1 + ceph osd ok-to-stop 2 --max 2 | grep '[2,3]' || return 1 + ceph osd ok-to-stop 2 --max 20 | grep '[2,3]' || return 1 + + # we should get the same result with one of the osds already down + kill_daemons $dir TERM osd.0 || return 1 + ceph osd down 0 || return 1 + wait_for_peered || return 1 + + ceph osd ok-to-stop 0 || return 1 + ceph osd ok-to-stop 0 1 || return 1 + ! ceph osd ok-to-stop 0 1 2 || return 1 + ! ceph osd ok-to-stop 1 2 3 || return 1 +} + + +main ok-to-stop "$@" diff --git a/qa/standalone/misc/rados-striper.sh b/qa/standalone/misc/rados-striper.sh new file mode 100755 index 000000000..be6349b81 --- /dev/null +++ b/qa/standalone/misc/rados-striper.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Sebastien Ponce <sebastien.ponce@cern.ch> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7116" # git grep '\<7116\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + # setup + setup $dir || return 1 + + # create a cluster with one monitor and three osds + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + + # create toyfile + dd if=/dev/urandom of=$dir/toyfile bs=1234 count=1 + + # put a striped object + rados --pool rbd --striper put toyfile $dir/toyfile || return 1 + + # stat it, with and without striping + rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1 + rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1 + echo ' size 1234' > $dir/refstat + diff -w $dir/stripedStat $dir/refstat || return 1 + diff -w $dir/stat $dir/refstat || return 1 + rados --pool rbd stat toyfile >& $dir/staterror + grep -q 'No such file or directory' $dir/staterror || return 1 + + # get the file back with and without striping + rados --pool rbd --striper get toyfile $dir/stripedGroup || return 1 + diff -w $dir/toyfile $dir/stripedGroup || return 1 + rados --pool rbd get toyfile.0000000000000000 $dir/nonSTripedGroup || return 1 + diff -w $dir/toyfile $dir/nonSTripedGroup || return 1 + + # test truncate + rados --pool rbd --striper truncate toyfile 12 + rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1 + rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1 + echo ' size 12' > $dir/reftrunc + diff -w $dir/stripedStat $dir/reftrunc || return 1 + diff -w $dir/stat $dir/reftrunc || return 1 + + # test xattrs + + rados --pool rbd --striper setxattr toyfile somexattr somevalue || return 1 + rados --pool rbd --striper getxattr toyfile somexattr > $dir/xattrvalue || return 1 + rados --pool rbd getxattr toyfile.0000000000000000 somexattr > $dir/xattrvalue2 || return 1 + echo 'somevalue' > $dir/refvalue + diff -w $dir/xattrvalue $dir/refvalue || return 1 + diff -w $dir/xattrvalue2 $dir/refvalue || return 1 + rados --pool rbd --striper listxattr toyfile > $dir/xattrlist || return 1 + echo 'somexattr' > $dir/reflist + diff -w $dir/xattrlist $dir/reflist || return 1 + rados --pool rbd listxattr toyfile.0000000000000000 | grep -v striper > $dir/xattrlist2 || return 1 + diff -w $dir/xattrlist2 $dir/reflist || return 1 + rados --pool rbd --striper rmxattr toyfile somexattr || return 1 + + local attr_not_found_str="No data available" + [ `uname` = FreeBSD ] && \ + attr_not_found_str="Attribute not found" + expect_failure $dir "$attr_not_found_str" \ + rados --pool rbd --striper getxattr toyfile somexattr || return 1 + expect_failure $dir "$attr_not_found_str" \ + rados --pool rbd getxattr toyfile.0000000000000000 somexattr || return 1 + + # test rm + rados --pool rbd --striper rm toyfile || return 1 + expect_failure $dir 'No such file or directory' \ + rados --pool rbd --striper stat toyfile || return 1 + expect_failure $dir 'No such file or directory' \ + rados --pool rbd stat toyfile.0000000000000000 || return 1 + + # cleanup + teardown $dir || return 1 +} + +main rados-striper "$@" diff --git a/qa/standalone/misc/test-ceph-helpers.sh b/qa/standalone/misc/test-ceph-helpers.sh new file mode 100755 index 000000000..e7805858a --- /dev/null +++ b/qa/standalone/misc/test-ceph-helpers.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Federico Gimenez <fgimenez@coit.es> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@" diff --git a/qa/standalone/misc/test-snaptrim-stats.sh b/qa/standalone/misc/test-snaptrim-stats.sh new file mode 100755 index 000000000..98b3e4fdd --- /dev/null +++ b/qa/standalone/misc/test-snaptrim-stats.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Red Hat <contact@redhat.com> +# +# Author: Sridhar Seshasayee <sseshasa@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-bluestore 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_snaptrim_stats() { + local dir=$1 + local poolname=test + local OSDS=3 + local PGNUM=8 + local PGPNUM=8 + local objects=10 + local WAIT_FOR_UPDATE=10 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1 + done + + # disable scrubs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool + create_pool $poolname $PGNUM $PGPNUM + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + # write a few objects + TESTDATA="testdata.1" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # create a snapshot, clones + SNAP=1 + rados -p $poolname mksnap snap${SNAP} + TESTDATA="testdata.2" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # remove the snapshot, should trigger snaptrim + rados -p $poolname rmsnap snap${SNAP} + + # check for snaptrim stats + wait_for_clean || return 1 + sleep $WAIT_FOR_UPDATE + local objects_trimmed=0 + local snaptrim_duration_total=0.0 + for i in $(seq 0 $(expr $PGNUM - 1)) + do + local pgid="${poolid}.${i}" + objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \ + jq '.info.stats.objects_trimmed')) + snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \ + $pgid query | jq '.info.stats.snaptrim_duration') | bc` + done + test $objects_trimmed -eq $objects || return 1 + echo "$snaptrim_duration_total > 0.0" | bc || return 1 + + teardown $dir || return 1 +} + +function TEST_snaptrim_stats_multiple_snaps() { + local dir=$1 + local poolname=test + local OSDS=3 + local PGNUM=8 + local PGPNUM=8 + local objects=10 + local WAIT_FOR_UPDATE=10 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1 + done + + # disable scrubs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool + create_pool $poolname $PGNUM $PGPNUM + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + # write a few objects + local TESTDATA="testdata.0" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # create snapshots, clones + NUMSNAPS=2 + for i in `seq 1 $NUMSNAPS` + do + rados -p $poolname mksnap snap${i} + TESTDATA="testdata".${i} + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + done + + # remove the snapshots, should trigger snaptrim + local total_objects_trimmed=0 + for i in `seq 1 $NUMSNAPS` + do + rados -p $poolname rmsnap snap${i} + + # check for snaptrim stats + wait_for_clean || return 1 + sleep $WAIT_FOR_UPDATE + local objects_trimmed=0 + local snaptrim_duration_total=0.0 + for i in $(seq 0 $(expr $PGNUM - 1)) + do + local pgid="${poolid}.${i}" + objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \ + jq '.info.stats.objects_trimmed')) + snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \ + $pgid query | jq '.info.stats.snaptrim_duration') | bc` + done + test $objects_trimmed -eq $objects || return 1 + echo "$snaptrim_duration_total > 0.0" | bc || return 1 + total_objects_trimmed=$(expr $total_objects_trimmed + $objects_trimmed) + done + + test $total_objects_trimmed -eq $((objects * NUMSNAPS)) || return 1 + + teardown $dir || return 1 +} +main test-snaptrim-stats "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh test-snaptrim-stats.sh" +# End: diff --git a/qa/standalone/misc/ver-health.sh b/qa/standalone/misc/ver-health.sh new file mode 100755 index 000000000..e03f8f4f5 --- /dev/null +++ b/qa/standalone/misc/ver-health.sh @@ -0,0 +1,231 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7165" # git grep '\<7165\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7166" # git grep '\<7166\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--mon_health_to_clog_tick_interval=1.0 " + export ORIG_CEPH_ARGS="$CEPH_ARGS" + + local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function wait_for_health_string() { + local grep_string=$1 + local seconds=${2:-20} + + # Allow mon to notice version difference + set -o pipefail + PASSED="false" + for ((i=0; i < $seconds; i++)); do + if ceph health | grep -q "$grep_string" + then + PASSED="true" + break + fi + sleep 1 + done + set +o pipefail + + # Make sure health changed + if [ $PASSED = "false" ]; + then + return 1 + fi + return 0 +} + + + +# Test a single OSD with an old version and multiple OSDs with 2 different old versions +function TEST_check_version_health_1() { + local dir=$1 + + # Asssume MON_A is leader? + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + # setup + setup $dir || return 1 + + # create a cluster with two monitors and three osds + run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + ceph health detail + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + + kill_daemons $dir KILL osd.1 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 1 + + wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1 + + ceph health detail + # Should notice that osd.1 is a different version + ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1 + + kill_daemons $dir KILL osd.2 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2 + kill_daemons $dir KILL osd.0 + ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0 + + wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + + ceph health detail + ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1 + ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1 +} + +# Test with 1 MON and 1 MDS with an older version, and add 2 OSDs with different versions +function TEST_check_version_health_2() { + local dir=$1 + + # Asssume MON_A is leader? + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + # setup + setup $dir || return 1 + + # create a cluster with all daemon types + run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_mgr $dir x || return 1 + run_mgr $dir y || return 1 + run_mds $dir m || return 1 + run_mds $dir n || return 1 + + sleep 5 + ceph health detail + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + + kill_daemons $dir KILL mon.b + ceph_debug_version_for_testing=01.00.00-gversion-test run_mon $dir b --mon_warn_older_version_delay=0 + # XXX: Manager doesn't seem to use the test specific config for version + #kill_daemons $dir KILL mgr.x + #ceph_debug_version_for_testing=02.00.00-gversion-test run_mgr $dir x + kill_daemons $dir KILL mds.m + ceph_debug_version_for_testing=01.00.00-gversion-test run_mds $dir m + + wait_for_health_string "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1 + + ceph health detail + # Should notice that mon.b and mds.m is a different version + ceph health detail | grep -q "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1 + ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There are daemons running an older version of ceph" || return 1 + ceph health detail | grep -q "mon.b mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1 + + kill_daemons $dir KILL osd.2 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2 + kill_daemons $dir KILL osd.0 + ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0 + + wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + + ceph health detail + ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "mon.b osd.2 mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1 + ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1 +} + +# Verify delay handling with same setup as test 1 +function TEST_check_version_health_3() { + local dir=$1 + + # Asssume MON_A is leader? + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + # setup + setup $dir || return 1 + + # create a cluster with two monitors and three osds + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + + local start_osd_time=$SECONDS + # use memstore for faster bootup + EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 0 || return 1 + EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 1 || return 1 + EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 2 || return 1 + # take the time used for boot osds into consideration + local warn_older_version_delay=$(($SECONDS - $start_osd_time + 20)) + + sleep 5 + ceph health detail + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + ceph tell 'mon.*' injectargs "--mon_warn_older_version_delay $warn_older_version_delay" + kill_daemons $dir KILL osd.1 + EXTRA_OPTS=" --osd-objectstore=memstore" \ + ceph_debug_version_for_testing=01.00.00-gversion-test \ + activate_osd $dir 1 + + # Wait 50% of 20 second delay config + sleep 10 + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + + # Now make sure that at least 20 seconds have passed + wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" 20 || return 1 + + ceph health detail + # Should notice that osd.1 is a different version + ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1 + + kill_daemons $dir KILL osd.2 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2 + kill_daemons $dir KILL osd.0 + ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0 + + wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + + ceph health detail + ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1 + ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1 +} + +main ver-health "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh ver-health.sh" +# End: diff --git a/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh new file mode 100755 index 000000000..276d26aab --- /dev/null +++ b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one + export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one + export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + export BASE_CEPH_ARGS=$CEPH_ARGS + CEPH_ARGS+="--mon-host=$CEPH_MON_A" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} +TEST_stretched_cluster_failover_add_three_osds(){ + local dir=$1 + local OSDS=8 + setup $dir || return 1 + + run_mon $dir a --public-addr $CEPH_MON_A || return 1 + wait_for_quorum 300 1 || return 1 + + run_mon $dir b --public-addr $CEPH_MON_B || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 + + run_mon $dir c --public-addr $CEPH_MON_C || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C" + wait_for_quorum 300 3 || return 1 + + run_mon $dir d --public-addr $CEPH_MON_D || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D" + wait_for_quorum 300 4 || return 1 + + run_mon $dir e --public-addr $CEPH_MON_E || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E" + wait_for_quorum 300 5 || return 1 + + ceph mon set election_strategy connectivity + ceph mon add disallowed_leader e + + run_mgr $dir x || return 1 + run_mgr $dir y || return 1 + run_mgr $dir z || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for zone in iris pze + do + ceph osd crush add-bucket $zone zone + ceph osd crush move $zone root=default + done + + + ceph osd crush add-bucket node-2 host + ceph osd crush add-bucket node-3 host + ceph osd crush add-bucket node-4 host + ceph osd crush add-bucket node-5 host + + ceph osd crush move node-2 zone=iris + ceph osd crush move node-3 zone=iris + ceph osd crush move node-4 zone=pze + ceph osd crush move node-5 zone=pze + + ceph osd crush move osd.0 host=node-2 + ceph osd crush move osd.1 host=node-2 + ceph osd crush move osd.2 host=node-3 + ceph osd crush move osd.3 host=node-3 + ceph osd crush move osd.4 host=node-4 + ceph osd crush move osd.5 host=node-4 + ceph osd crush move osd.6 host=node-5 + ceph osd crush move osd.7 host=node-5 + + ceph mon set_location a zone=iris host=node-2 + ceph mon set_location b zone=iris host=node-3 + ceph mon set_location c zone=pze host=node-4 + ceph mon set_location d zone=pze host=node-5 + + hostname=$(hostname -s) + ceph osd crush remove $hostname || return 1 + ceph osd getcrushmap > crushmap || return 1 + crushtool --decompile crushmap > crushmap.txt || return 1 + sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1 + cat >> crushmap_modified.txt << EOF +rule stretch_rule { + id 1 + type replicated + min_size 1 + max_size 10 + step take iris + step chooseleaf firstn 2 type host + step emit + step take pze + step chooseleaf firstn 2 type host + step emit +} + +# end crush map +EOF + + crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1 + ceph osd setcrushmap -i crushmap.bin || return 1 + local stretched_poolname=stretched_rbdpool + ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1 + ceph osd pool set $stretched_poolname size 4 || return 1 + + sleep 3 + + ceph mon set_location e zone=arbiter host=node-1 + ceph mon enable_stretch_mode e stretch_rule zone + + kill_daemons $dir KILL mon.c || return 1 + kill_daemons $dir KILL mon.d || return 1 + + kill_daemons $dir KILL osd.4 || return 1 + kill_daemons $dir KILL osd.5 || return 1 + kill_daemons $dir KILL osd.6 || return 1 + kill_daemons $dir KILL osd.7 || return 1 + + ceph -s + + sleep 3 + + run_osd $dir 8 || return 1 + run_osd $dir 9 || return 1 + run_osd $dir 10 || return 1 + + ceph -s + + sleep 3 + + teardown $dir || return 1 +} +main mon-stretch-fail-recovery "$@"
\ No newline at end of file diff --git a/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh new file mode 100755 index 000000000..7e13f4076 --- /dev/null +++ b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh @@ -0,0 +1,145 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one + export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one + export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + export BASE_CEPH_ARGS=$CEPH_ARGS + CEPH_ARGS+="--mon-host=$CEPH_MON_A" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} +TEST_stretched_cluster_uneven_weight() { + local dir=$1 + local OSDS=4 + local weight=0.09000 + setup $dir || return 1 + + run_mon $dir a --public-addr $CEPH_MON_A || return 1 + wait_for_quorum 300 1 || return 1 + + run_mon $dir b --public-addr $CEPH_MON_B || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 + + run_mon $dir c --public-addr $CEPH_MON_C || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C" + wait_for_quorum 300 3 || return 1 + + run_mon $dir d --public-addr $CEPH_MON_D || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D" + wait_for_quorum 300 4 || return 1 + + run_mon $dir e --public-addr $CEPH_MON_E || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E" + wait_for_quorum 300 5 || return 1 + + ceph mon set election_strategy connectivity + ceph mon add disallowed_leader e + + run_mgr $dir x || return 1 + run_mgr $dir y || return 1 + run_mgr $dir z || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for zone in iris pze + do + ceph osd crush add-bucket $zone zone + ceph osd crush move $zone root=default + done + + ceph osd crush add-bucket node-2 host + ceph osd crush add-bucket node-3 host + ceph osd crush add-bucket node-4 host + ceph osd crush add-bucket node-5 host + + ceph osd crush move node-2 zone=iris + ceph osd crush move node-3 zone=iris + ceph osd crush move node-4 zone=pze + ceph osd crush move node-5 zone=pze + + ceph osd crush move osd.0 host=node-2 + ceph osd crush move osd.1 host=node-3 + ceph osd crush move osd.2 host=node-4 + ceph osd crush move osd.3 host=node-5 + + ceph mon set_location a zone=iris host=node-2 + ceph mon set_location b zone=iris host=node-3 + ceph mon set_location c zone=pze host=node-4 + ceph mon set_location d zone=pze host=node-5 + + hostname=$(hostname -s) + ceph osd crush remove $hostname || return 1 + ceph osd getcrushmap > crushmap || return 1 + crushtool --decompile crushmap > crushmap.txt || return 1 + sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1 + cat >> crushmap_modified.txt << EOF +rule stretch_rule { + id 1 + type replicated + min_size 1 + max_size 10 + step take iris + step chooseleaf firstn 2 type host + step emit + step take pze + step chooseleaf firstn 2 type host + step emit +} +# end crush map +EOF + + crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1 + ceph osd setcrushmap -i crushmap.bin || return 1 + local stretched_poolname=stretched_rbdpool + ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1 + ceph osd pool set $stretched_poolname size 4 || return 1 + + ceph mon set_location e zone=arbiter host=node-1 || return 1 + ceph mon enable_stretch_mode e stretch_rule zone || return 1 # Enter strech mode + + # reweight to a more round decimal. + ceph osd crush reweight osd.0 $weight + ceph osd crush reweight osd.1 $weight + ceph osd crush reweight osd.2 $weight + ceph osd crush reweight osd.3 $weight + + # Firstly, we test for stretch mode buckets != 2 + ceph osd crush add-bucket sham zone || return 1 + ceph osd crush move sham root=default || return 1 + wait_for_health "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1 + + ceph osd crush rm sham # clear the health warn + wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1 + + # Next, we test for uneven weights across buckets + + ceph osd crush reweight osd.0 0.07000 + + wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1 + + ceph osd crush reweight osd.0 $weight # clear the health warn + + wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1 + + teardown $dir || return 1 +} +main mon-stretched-cluster-uneven-weight "$@"
\ No newline at end of file diff --git a/qa/standalone/mon/health-mute.sh b/qa/standalone/mon/health-mute.sh new file mode 100755 index 000000000..d8e07ca06 --- /dev/null +++ b/qa/standalone/mon/health-mute.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7143" # git grep '\<714\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-pg-warn-min-per-osd 0 --mon-max-pg-per-osd 1000 " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mute() { + local dir=$1 + setup $dir || return 1 + + set -o pipefail + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 8 + ceph osd pool application enable foo rbd --yes-i-really-mean-it + wait_for_clean || return 1 + + ceph -s + ceph health | grep HEALTH_OK || return 1 + # test warning on setting pool size=1 + ceph osd pool set foo size 1 --yes-i-really-mean-it + ceph -s + ceph health | grep HEALTH_WARN || return 1 + ceph health detail | grep POOL_NO_REDUNDANCY || return 1 + ceph health mute POOL_NO_REDUNDANCY + ceph -s + ceph health | grep HEALTH_OK | grep POOL_NO_REDUNDANCY || return 1 + ceph health unmute POOL_NO_REDUNDANCY + ceph -s + ceph health | grep HEALTH_WARN || return 1 + # restore pool size to default + ceph osd pool set foo size 3 + ceph -s + ceph health | grep HEALTH_OK || return 1 + ceph osd set noup + ceph -s + ceph health detail | grep OSDMAP_FLAGS || return 1 + ceph osd down 0 + ceph -s + ceph health detail | grep OSD_DOWN || return 1 + ceph health detail | grep HEALTH_WARN || return 1 + + ceph health mute OSD_DOWN + ceph health mute OSDMAP_FLAGS + ceph -s + ceph health | grep HEALTH_OK | grep OSD_DOWN | grep OSDMAP_FLAGS || return 1 + ceph health unmute OSD_DOWN + ceph -s + ceph health | grep HEALTH_WARN || return 1 + + # ttl + ceph health mute OSD_DOWN 10s + ceph -s + ceph health | grep HEALTH_OK || return 1 + sleep 15 + ceph -s + ceph health | grep HEALTH_WARN || return 1 + + # sticky + ceph health mute OSDMAP_FLAGS --sticky + ceph osd unset noup + sleep 5 + ceph -s + ceph health | grep OSDMAP_FLAGS || return 1 + ceph osd set noup + ceph -s + ceph health | grep HEALTH_OK || return 1 + + # rachet down on OSD_DOWN count + ceph osd down 0 1 + ceph -s + ceph health detail | grep OSD_DOWN || return 1 + + ceph health mute OSD_DOWN + kill_daemons $dir TERM osd.0 + ceph osd unset noup + sleep 10 + ceph -s + ceph health detail | grep OSD_DOWN || return 1 + ceph health detail | grep '1 osds down' || return 1 + ceph health | grep HEALTH_OK || return 1 + + sleep 10 # give time for mon tick to rachet the mute + ceph osd set noup + ceph health mute OSDMAP_FLAGS + ceph -s + ceph health detail + ceph health | grep HEALTH_OK || return 1 + + ceph osd down 1 + ceph -s + ceph health detail + ceph health detail | grep '2 osds down' || return 1 + + sleep 10 # give time for mute to clear + ceph -s + ceph health detail + ceph health | grep HEALTH_WARN || return 1 + ceph health detail | grep '2 osds down' || return 1 + + teardown $dir || return 1 +} + +main health-mute "$@" diff --git a/qa/standalone/mon/misc.sh b/qa/standalone/mon/misc.sh new file mode 100755 index 000000000..c7fc6d441 --- /dev/null +++ b/qa/standalone/mon/misc.sh @@ -0,0 +1,284 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +TEST_POOL=rbd + +function TEST_osd_pool_get_set() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + create_pool $TEST_POOL 8 + + local flag + for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do + ceph osd pool set $TEST_POOL $flag 0 || return 1 + ! ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag 1 || return 1 + ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag false || return 1 + ! ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag false || return 1 + # check that setting false twice does not toggle to true (bug) + ! ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag true || return 1 + ceph osd dump | grep 'pool ' | grep $flag || return 1 + # cleanup + ceph osd pool set $TEST_POOL $flag 0 || return 1 + done + + local size=$(ceph osd pool get $TEST_POOL size|awk '{print $2}') + local min_size=$(ceph osd pool get $TEST_POOL min_size|awk '{print $2}') + local expected_min_size=$(expr $size - $size / 2) + if [ $min_size -ne $expected_min_size ]; then + echo "default min_size is wrong: expected $expected_min_size, got $min_size" + return 1 + fi + + ceph osd pool set $TEST_POOL scrub_min_interval 123456 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_min_interval 123456' || return 1 + ceph osd pool set $TEST_POOL scrub_min_interval 0 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_min_interval' && return 1 + ceph osd pool set $TEST_POOL scrub_max_interval 123456 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_max_interval 123456' || return 1 + ceph osd pool set $TEST_POOL scrub_max_interval 0 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_max_interval' && return 1 + ceph osd pool set $TEST_POOL deep_scrub_interval 123456 || return 1 + ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval 123456' || return 1 + ceph osd pool set $TEST_POOL deep_scrub_interval 0 || return 1 + ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval' && return 1 + + #replicated pool size restrict in 1 and 10 + ! ceph osd pool set $TEST_POOL 11 || return 1 + #replicated pool min_size must be between in 1 and size + ! ceph osd pool set $TEST_POOL min_size $(expr $size + 1) || return 1 + ! ceph osd pool set $TEST_POOL min_size 0 || return 1 + + local ecpool=erasepool + create_pool $ecpool 12 12 erasure default || return 1 + #erasue pool size=k+m, min_size=k + local size=$(ceph osd pool get $ecpool size|awk '{print $2}') + local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}') + local k=$(expr $min_size - 1) # default min_size=k+1 + #erasure pool size can't change + ! ceph osd pool set $ecpool size $(expr $size + 1) || return 1 + #erasure pool min_size must be between in k and size + ceph osd pool set $ecpool min_size $(expr $k + 1) || return 1 + ! ceph osd pool set $ecpool min_size $(expr $k - 1) || return 1 + ! ceph osd pool set $ecpool min_size $(expr $size + 1) || return 1 + + teardown $dir || return 1 +} + +function TEST_mon_add_to_single_mon() { + local dir=$1 + + fsid=$(uuidgen) + MONA=127.0.0.1:7117 # git grep '\<7117\>' : there must be only one + MONB=127.0.0.1:7118 # git grep '\<7118\>' : there must be only one + CEPH_ARGS_orig=$CEPH_ARGS + CEPH_ARGS="--fsid=$fsid --auth-supported=none " + CEPH_ARGS+="--mon-initial-members=a " + CEPH_ARGS+="--mon-host=$MONA " + + setup $dir || return 1 + run_mon $dir a --public-addr $MONA || return 1 + # wait for the quorum + timeout 120 ceph -s > /dev/null || return 1 + run_mon $dir b --public-addr $MONB || return 1 + teardown $dir || return 1 + + setup $dir || return 1 + run_mon $dir a --public-addr $MONA || return 1 + # without the fix of #5454, mon.a will assert failure at seeing the MMonJoin + # from mon.b + run_mon $dir b --public-addr $MONB || return 1 + # make sure mon.b get's it's join request in first, then + sleep 2 + # wait for the quorum + timeout 120 ceph -s > /dev/null || return 1 + ceph mon dump + ceph mon dump -f json-pretty + local num_mons + num_mons=$(ceph mon dump --format=json 2>/dev/null | jq ".mons | length") || return 1 + [ $num_mons == 2 ] || return 1 + # no reason to take more than 120 secs to get this submitted + timeout 120 ceph mon add b $MONB || return 1 + teardown $dir || return 1 +} + +function TEST_no_segfault_for_bad_keyring() { + local dir=$1 + setup $dir || return 1 + # create a client.admin key and add it to ceph.mon.keyring + ceph-authtool --create-keyring $dir/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *' + ceph-authtool --create-keyring $dir/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' + ceph-authtool $dir/ceph.mon.keyring --import-keyring $dir/ceph.client.admin.keyring + CEPH_ARGS_TMP="--fsid=$(uuidgen) --mon-host=127.0.0.1:7102 --auth-supported=cephx " + CEPH_ARGS_orig=$CEPH_ARGS + CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/ceph.mon.keyring " + run_mon $dir a + # create a bad keyring and make sure no segfault occurs when using the bad keyring + echo -e "[client.admin]\nkey = BQAUlgtWoFePIxAAQ9YLzJSVgJX5V1lh5gyctg==" > $dir/bad.keyring + CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/bad.keyring" + ceph osd dump 2> /dev/null + # 139(11|128) means segfault and core dumped + [ $? -eq 139 ] && return 1 + CEPH_ARGS=$CEPH_ARGS_orig + teardown $dir || return 1 +} + +function TEST_mon_features() { + local dir=$1 + setup $dir || return 1 + + fsid=$(uuidgen) + MONA=127.0.0.1:7127 # git grep '\<7127\>' ; there must be only one + MONB=127.0.0.1:7128 # git grep '\<7128\>' ; there must be only one + MONC=127.0.0.1:7129 # git grep '\<7129\>' ; there must be only one + CEPH_ARGS_orig=$CEPH_ARGS + CEPH_ARGS="--fsid=$fsid --auth-supported=none " + CEPH_ARGS+="--mon-host=$MONA,$MONB,$MONC " + CEPH_ARGS+="--mon-debug-no-initial-persistent-features " + CEPH_ARGS+="--mon-debug-no-require-reef " + + run_mon $dir a --public-addr $MONA || return 1 + run_mon $dir b --public-addr $MONB || return 1 + timeout 120 ceph -s > /dev/null || return 1 + + # expect monmap to contain 3 monitors (a, b, and c) + jqinput="$(ceph quorum_status --format=json 2>/dev/null)" + jq_success "$jqinput" '.monmap.mons | length == 3' || return 1 + # quorum contains two monitors + jq_success "$jqinput" '.quorum | length == 2' || return 1 + # quorum's monitor features contain kraken, luminous, mimic, nautilus, + # octopus, pacific, quincy + jqfilter='.features.quorum_mon[]|select(. == "kraken")' + jq_success "$jqinput" "$jqfilter" "kraken" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "luminous")' + jq_success "$jqinput" "$jqfilter" "luminous" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "mimic")' + jq_success "$jqinput" "$jqfilter" "mimic" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "nautilus")' + jq_success "$jqinput" "$jqfilter" "nautilus" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "octopus")' + jq_success "$jqinput" "$jqfilter" "octopus" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "quincy")' + jq_success "$jqinput" "$jqfilter" "quincy" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "reef")' + jq_success "$jqinput" "$jqfilter" "reef" || return 1 + + # monmap must have no persistent features set, because we + # don't currently have a quorum made out of all the monitors + # in the monmap. + jqfilter='.monmap.features.persistent | length == 0' + jq_success "$jqinput" "$jqfilter" || return 1 + + # nor do we have any optional features, for that matter. + jqfilter='.monmap.features.optional | length == 0' + jq_success "$jqinput" "$jqfilter" || return 1 + + # validate 'mon feature ls' + + jqinput="$(ceph mon feature ls --format=json 2>/dev/null)" + # k l m n o p q are supported + jqfilter='.all.supported[] | select(. == "kraken")' + jq_success "$jqinput" "$jqfilter" "kraken" || return 1 + jqfilter='.all.supported[] | select(. == "luminous")' + jq_success "$jqinput" "$jqfilter" "luminous" || return 1 + jqfilter='.all.supported[] | select(. == "mimic")' + jq_success "$jqinput" "$jqfilter" "mimic" || return 1 + jqfilter='.all.supported[] | select(. == "nautilus")' + jq_success "$jqinput" "$jqfilter" "nautilus" || return 1 + jqfilter='.all.supported[] | select(. == "octopus")' + jq_success "$jqinput" "$jqfilter" "octopus" || return 1 + jqfilter='.all.supported[] | select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + jqfilter='.all.supported[] | select(. == "quincy")' + jq_success "$jqinput" "$jqfilter" "quincy" || return 1 + jqfilter='.all.supported[] | select(. == "reef")' + jq_success "$jqinput" "$jqfilter" "reef" || return 1 + + # start third monitor + run_mon $dir c --public-addr $MONC || return 1 + + wait_for_quorum 300 3 || return 1 + + timeout 300 ceph -s > /dev/null || return 1 + + jqinput="$(ceph quorum_status --format=json 2>/dev/null)" + # expect quorum to have all three monitors + jqfilter='.quorum | length == 3' + jq_success "$jqinput" "$jqfilter" || return 1 + + # quorum's monitor features should have p now too + jqfilter='.features.quorum_mon[]|select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + + # persistent too + jqfilter='.monmap.features.persistent[]|select(. == "kraken")' + jq_success "$jqinput" "$jqfilter" "kraken" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "luminous")' + jq_success "$jqinput" "$jqfilter" "luminous" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "mimic")' + jq_success "$jqinput" "$jqfilter" "mimic" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "osdmap-prune")' + jq_success "$jqinput" "$jqfilter" "osdmap-prune" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "nautilus")' + jq_success "$jqinput" "$jqfilter" "nautilus" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "octopus")' + jq_success "$jqinput" "$jqfilter" "octopus" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "elector-pinging")' + jq_success "$jqinput" "$jqfilter" "elector-pinging" || return 1 + jqfilter='.monmap.features.persistent | length == 10' + jq_success "$jqinput" "$jqfilter" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "quincy")' + jq_success "$jqinput" "$jqfilter" "quincy" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "reef")' + jq_success "$jqinput" "$jqfilter" "reef" || return 1 + + CEPH_ARGS=$CEPH_ARGS_orig + # that's all folks. thank you for tuning in. + teardown $dir || return 1 +} + +main misc "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/misc.sh" +# End: diff --git a/qa/standalone/mon/mkfs.sh b/qa/standalone/mon/mkfs.sh new file mode 100755 index 000000000..6650bdb49 --- /dev/null +++ b/qa/standalone/mon/mkfs.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +set -xe +PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + + +DIR=mkfs +export CEPH_CONF=/dev/null +unset CEPH_ARGS +MON_ID=a +MON_DIR=$DIR/$MON_ID +CEPH_MON=127.0.0.1:7110 # git grep '\<7110\>' : there must be only one +TIMEOUT=360 + +EXTRAOPTS="" + +function setup() { + teardown + mkdir $DIR +} + +function teardown() { + kill_daemons + rm -fr $DIR +} + +function mon_mkfs() { + local fsid=$(uuidgen) + + ceph-mon \ + --id $MON_ID \ + --fsid $fsid \ + $EXTRAOPTS \ + --mkfs \ + --mon-data=$MON_DIR \ + --mon-initial-members=$MON_ID \ + --mon-host=$CEPH_MON \ + "$@" +} + +function mon_run() { + ceph-mon \ + --id $MON_ID \ + --chdir= \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + $EXTRAOPTS \ + --mon-data=$MON_DIR \ + --log-file=$MON_DIR/log \ + --mon-cluster-log-file=$MON_DIR/log \ + --run-dir=$MON_DIR \ + --pid-file=$MON_DIR/pidfile \ + --public-addr $CEPH_MON \ + "$@" +} + +function kill_daemons() { + for pidfile in $(find $DIR -name pidfile) ; do + pid=$(cat $pidfile) + for try in 0 1 1 1 2 3 ; do + kill $pid || break + sleep $try + done + done +} + +function auth_none() { + mon_mkfs --auth-supported=none + + ceph-mon \ + --id $MON_ID \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + $EXTRAOPTS \ + --mon-data=$MON_DIR \ + --extract-monmap $MON_DIR/monmap + + [ -f $MON_DIR/monmap ] || return 1 + + [ ! -f $MON_DIR/keyring ] || return 1 + + mon_run --auth-supported=none + + timeout $TIMEOUT ceph --mon-host $CEPH_MON mon stat || return 1 +} + +function auth_cephx_keyring() { + cat > $DIR/keyring <<EOF +[mon.] + key = AQDUS79S0AF9FRAA2cgRLFscVce0gROn/s9WMg== + caps mon = "allow *" +EOF + + mon_mkfs --keyring=$DIR/keyring + + [ -f $MON_DIR/keyring ] || return 1 + + mon_run + + timeout $TIMEOUT ceph \ + --name mon. \ + --keyring $MON_DIR/keyring \ + --mon-host $CEPH_MON mon stat || return 1 +} + +function auth_cephx_key() { + if [ -f /etc/ceph/keyring ] ; then + echo "Please move /etc/ceph/keyring away for testing!" + return 1 + fi + + local key=$(ceph-authtool --gen-print-key) + + if mon_mkfs --key='corrupted key' ; then + return 1 + else + rm -fr $MON_DIR/store.db + rm -fr $MON_DIR/kv_backend + fi + + mon_mkfs --key=$key + + [ -f $MON_DIR/keyring ] || return 1 + grep $key $MON_DIR/keyring + + mon_run + + timeout $TIMEOUT ceph \ + --name mon. \ + --keyring $MON_DIR/keyring \ + --mon-host $CEPH_MON mon stat || return 1 +} + +function makedir() { + local toodeep=$MON_DIR/toodeep + + # fail if recursive directory creation is needed + ceph-mon \ + --id $MON_ID \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + $EXTRAOPTS \ + --mkfs \ + --mon-data=$toodeep 2>&1 | tee $DIR/makedir.log + grep 'toodeep.*No such file' $DIR/makedir.log > /dev/null + rm $DIR/makedir.log + + # an empty directory does not mean the mon exists + mkdir $MON_DIR + mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log + ! grep "$MON_DIR already exists" $DIR/makedir.log || return 1 +} + +function idempotent() { + mon_mkfs --auth-supported=none + mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log + grep "'$MON_DIR' already exists" $DIR/makedir.log > /dev/null || return 1 +} + +function run() { + local actions + actions+="makedir " + actions+="idempotent " + actions+="auth_cephx_key " + actions+="auth_cephx_keyring " + actions+="auth_none " + for action in $actions ; do + setup + $action || return 1 + teardown + done +} + +run + +# Local Variables: +# compile-command: "cd ../.. ; make TESTS=test/mon/mkfs.sh check" +# End: diff --git a/qa/standalone/mon/mon-bind.sh b/qa/standalone/mon/mon-bind.sh new file mode 100755 index 000000000..41982b916 --- /dev/null +++ b/qa/standalone/mon/mon-bind.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Quantum Corp. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +SOCAT_PIDS=() + +function port_forward() { + local source_port=$1 + local target_port=$2 + + socat TCP-LISTEN:${source_port},fork,reuseaddr TCP:localhost:${target_port} & + SOCAT_PIDS+=( $! ) +} + +function cleanup() { + for p in "${SOCAT_PIDS[@]}"; do + kill $p + done + SOCAT_PIDS=() +} + +trap cleanup SIGTERM SIGKILL SIGQUIT SIGINT + +function run() { + local dir=$1 + shift + + export MON_IP=127.0.0.1 + export MONA_PUBLIC=7132 # git grep '\<7132\>' ; there must be only one + export MONB_PUBLIC=7133 # git grep '\<7133\>' ; there must be only one + export MONC_PUBLIC=7134 # git grep '\<7134\>' ; there must be only one + export MONA_BIND=7135 # git grep '\<7135\>' ; there must be only one + export MONB_BIND=7136 # git grep '\<7136\>' ; there must be only one + export MONC_BIND=7137 # git grep '\<7137\>' ; there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir && cleanup || { cleanup; return 1; } + teardown $dir + done +} + +function TEST_mon_client_connect_fails() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} " + run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + + # now attempt to ping it that should fail. + timeout 3 ceph ping mon.a || return 0 + return 1 +} + +function TEST_mon_client_connect() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} " + run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + + # now forward the public port to the bind port. + port_forward ${MONA_PUBLIC} ${MONA_BIND} + + # attempt to connect. we expect that to work + ceph ping mon.a || return 1 +} + +function TEST_mon_quorum() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} " + run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1 + run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1 + + # now forward the public port to the bind port. + port_forward ${MONA_PUBLIC} ${MONA_BIND} + port_forward ${MONB_PUBLIC} ${MONB_BIND} + port_forward ${MONC_PUBLIC} ${MONC_BIND} + + # expect monmap to contain 3 monitors (a, b, and c) + jqinput="$(ceph quorum_status --format=json 2>/dev/null)" + jq_success "$jqinput" '.monmap.mons | length == 3' || return 1 + + # quorum should form + wait_for_quorum 300 3 || return 1 + # expect quorum to have all three monitors + jqfilter='.quorum | length == 3' + jq_success "$jqinput" "$jqfilter" || return 1 +} + +function TEST_put_get() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} " + run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1 + run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1 + + # now forward the public port to the bind port. + port_forward ${MONA_PUBLIC} ${MONA_BIND} + port_forward ${MONB_PUBLIC} ${MONB_BIND} + port_forward ${MONC_PUBLIC} ${MONC_BIND} + + # quorum should form + wait_for_quorum 300 3 || return 1 + + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_pool hello 8 || return 1 + + echo "hello world" > $dir/hello + rados --pool hello put foo $dir/hello || return 1 + rados --pool hello get foo $dir/hello2 || return 1 + diff $dir/hello $dir/hello2 || return 1 +} + +main mon-bind "$@" diff --git a/qa/standalone/mon/mon-created-time.sh b/qa/standalone/mon/mon-created-time.sh new file mode 100755 index 000000000..4b8446059 --- /dev/null +++ b/qa/standalone/mon/mon-created-time.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 SUSE LINUX GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7125" # git grep '\<7125\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_created_time() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph mon dump || return 1 + + if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = ""x ; then + return 1 + fi + + if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = "0.000000"x ; then + return 1 + fi +} + +main mon-created-time "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-created-time.sh" +# End: diff --git a/qa/standalone/mon/mon-handle-forward.sh b/qa/standalone/mon/mon-handle-forward.sh new file mode 100755 index 000000000..01c8f130f --- /dev/null +++ b/qa/standalone/mon/mon-handle-forward.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014,2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + + setup $dir || return 1 + + MONA=127.0.0.1:7300 + MONB=127.0.0.1:7301 + ( + FSID=$(uuidgen) + export CEPH_ARGS + CEPH_ARGS+="--fsid=$FSID --auth-supported=none " + CEPH_ARGS+="--mon-host=$MONA,$MONB " + run_mon $dir a --public-addr $MONA || return 1 + run_mon $dir b --public-addr $MONB || return 1 + ) + + timeout 360 ceph --mon-host-override $MONA mon stat || return 1 + # check that MONB is indeed a peon + ceph --admin-daemon $(get_asok_path mon.b) mon_status | + grep '"peon"' || return 1 + # when the leader ( MONA ) is used, there is no message forwarding + ceph --mon-host-override $MONA osd pool create POOL1 12 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'mon_command(.*"POOL1"' $dir/mon.a.log || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1 + grep 'mon_command(.*"POOL1"' $dir/mon.b.log && return 1 + # when the peon ( MONB ) is used, the message is forwarded to the leader + ceph --mon-host-override $MONB osd pool create POOL2 12 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1 + grep 'forward_request.*mon_command(.*"POOL2"' $dir/mon.b.log || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep ' forward(mon_command(.*"POOL2"' $dir/mon.a.log || return 1 + # forwarded messages must retain features from the original connection + features=$(sed -n -e 's|.*127.0.0.1:0.*accept features \([0-9][0-9]*\)|\1|p' < \ + $dir/mon.b.log) + grep ' forward(mon_command(.*"POOL2".*con_features '$features $dir/mon.a.log || return 1 + + teardown $dir || return 1 +} + +main mon-handle-forward "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 TESTS=test/mon/mon-handle-forward.sh check" +# End: diff --git a/qa/standalone/mon/mon-last-epoch-clean.sh b/qa/standalone/mon/mon-last-epoch-clean.sh new file mode 100755 index 000000000..82243103e --- /dev/null +++ b/qa/standalone/mon/mon-last-epoch-clean.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7302" # git grep '\<7105\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function check_lec_equals_pools() { + + local pool_id=$1 + + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + + if [[ -z "$pool_id" ]]; then + pools=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.floor == $lec) | .poolid")) + + [[ ${#pools[*]} -eq 2 ]] || ( echo $report ; return 1 ) + else + floor=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_id) | .floor")) + + [[ $lec -eq $floor ]] || ( echo $report ; return 1 ) + fi + return 0 +} + +function check_lec_lower_than_pool() { + + local pool_id=$1 + [[ -z "$pool_id" ]] && ( echo "expected pool_id as parameter" ; exit 1 ) + + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + + floor=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_id) | .floor")) + + [[ $lec -lt $floor ]] || ( echo $report ; return 1 ) + return 0 +} + +function check_floor_pool_greater_than_pool() { + + local pool_a=$1 + local pool_b=$1 + [[ -z "$pool_a" ]] && ( echo "expected id as first parameter" ; exit 1 ) + [[ -z "$pool_b" ]] && ( echo "expected id as second parameter" ; exit 1 ) + + report=$(ceph report) + + floor_a=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_a) | .floor")) + + floor_b=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_b) | .floor")) + + [[ $floor_a -gt $floor_b ]] || ( echo $report ; return 1 ) + return 0 +} + +function check_lec_honours_osd() { + + local osd=$1 + + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + + if [[ -z "$osd" ]]; then + osds=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.osd_epochs[] |" \ + " select(.epoch >= $lec) | .id")) + + [[ ${#osds[*]} -eq 3 ]] || ( echo $report ; return 1 ) + else + epoch=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.osd_epochs[] |" \ + " select(.id == $id) | .epoch")) + [[ ${#epoch[*]} -eq 1 ]] || ( echo $report ; return 1 ) + [[ ${epoch[0]} -ge $lec ]] || ( echo $report ; return 1 ) + fi + + return 0 +} + +function validate_fc() { + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + osdm_fc=$(echo $report | \ + jq '.osdmap_first_committed') + + [[ $lec -eq $osdm_fc ]] || ( echo $report ; return 1 ) + return 0 +} + +function get_fc_lc_diff() { + report=$(ceph report) + osdm_fc=$(echo $report | \ + jq '.osdmap_first_committed') + osdm_lc=$(echo $report | \ + jq '.osdmap_last_committed') + + echo $((osdm_lc - osdm_fc)) +} + +function get_pool_id() { + + local pn=$1 + [[ -z "$pn" ]] && ( echo "expected pool name as argument" ; exit 1 ) + + report=$(ceph report) + pool_id=$(echo $report | \ + jq ".osdmap.pools[] | select(.pool_name == \"$pn\") | .pool") + + [[ $pool_id -ge 0 ]] || \ + ( echo "unexpected pool id for pool \'$pn\': $pool_id" ; return -1 ) + + echo $pool_id + return 0 +} + +function wait_for_total_num_maps() { + # rip wait_for_health, becaue it's easier than deduplicating the code + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + local -i v_diff=$1 + + while [[ $(get_fc_lc_diff) -gt $v_diff ]]; do + if (( $loop >= ${#delays[*]} )) ; then + echo "maps were not trimmed" + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +function TEST_mon_last_clean_epoch() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x --mon-warn-on-pool-no-app=false || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + osd_pid=$(cat $dir/osd.2.pid) + + sleep 5 + + ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1 + ceph tell 'mon.*' injectargs \ + '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1 + + create_pool foo 32 + create_pool bar 32 + + foo_id=$(get_pool_id "foo") + bar_id=$(get_pool_id "bar") + + [[ $foo_id -lt 0 ]] && ( echo "couldn't find pool 'foo' id" ; exit 1 ) + [[ $bar_id -lt 0 ]] && ( echo "couldn't find pool 'bar' id" ; exit 1 ) + + # no real clue why we are getting these warnings, but let's make them go + # away so we can be happy. + + ceph osd set-full-ratio 0.97 + ceph osd set-backfillfull-ratio 0.97 + + wait_for_health_ok || exit 1 + + pre_map_diff=$(get_fc_lc_diff) + wait_for_total_num_maps 2 + post_map_diff=$(get_fc_lc_diff) + + [[ $post_map_diff -le $pre_map_diff ]] || exit 1 + + pre_map_diff=$post_map_diff + + ceph osd pool set foo size 3 + ceph osd pool set bar size 3 + + wait_for_health_ok || exit 1 + + check_lec_equals_pools || exit 1 + check_lec_honours_osd || exit 1 + validate_fc || exit 1 + + # down osd.2; expected result (because all pools' size equals 3): + # - number of committed maps increase over 2 + # - lec equals fc + # - lec equals osd.2's epoch + # - all pools have floor equal to lec + + while kill $osd_pid ; do sleep 1 ; done + ceph osd out 2 + sleep 5 # seriously, just to make sure things settle; we may not need this. + + # generate some maps + for ((i=0; i <= 10; ++i)); do + ceph osd set noup + sleep 1 + ceph osd unset noup + sleep 1 + done + + post_map_diff=$(get_fc_lc_diff) + [[ $post_map_diff -gt 2 ]] || exit 1 + + validate_fc || exit 1 + check_lec_equals_pools || exit 1 + check_lec_honours_osd 2 || exit 1 + + # adjust pool 'bar' size to 2; expect: + # - number of committed maps still over 2 + # - lec equals fc + # - lec equals pool 'foo' floor + # - pool 'bar' floor greater than pool 'foo' + + ceph osd pool set bar size 2 + + diff_ver=$(get_fc_lc_diff) + [[ $diff_ver -gt 2 ]] || exit 1 + + validate_fc || exit 1 + + check_lec_equals_pools $foo_id || exit 1 + check_lec_lower_than_pool $bar_id || exit 1 + + check_floor_pool_greater_than_pool $bar_id $foo_id || exit 1 + + # set pool 'foo' size to 2; expect: + # - health_ok + # - lec equals pools + # - number of committed maps decreases + # - lec equals fc + + pre_map_diff=$(get_fc_lc_diff) + + ceph osd pool set foo size 2 || exit 1 + wait_for_clean || exit 1 + + check_lec_equals_pools || exit 1 + validate_fc || exit 1 + + if ! wait_for_total_num_maps 2 ; then + post_map_diff=$(get_fc_lc_diff) + # number of maps is decreasing though, right? + [[ $post_map_diff -lt $pre_map_diff ]] || exit 1 + fi + + # bring back osd.2; expect: + # - health_ok + # - lec equals fc + # - number of committed maps equals 2 + # - all pools have floor equal to lec + + pre_map_diff=$(get_fc_lc_diff) + + activate_osd $dir 2 || exit 1 + wait_for_health_ok || exit 1 + validate_fc || exit 1 + check_lec_equals_pools || exit 1 + + if ! wait_for_total_num_maps 2 ; then + post_map_diff=$(get_fc_lc_diff) + # number of maps is decreasing though, right? + [[ $post_map_diff -lt $pre_map_diff ]] || exit 1 + fi + + return 0 +} + +main mon-last-clean-epoch "$@" diff --git a/qa/standalone/mon/mon-osdmap-prune.sh b/qa/standalone/mon/mon-osdmap-prune.sh new file mode 100755 index 000000000..f8f7876bb --- /dev/null +++ b/qa/standalone/mon/mon-osdmap-prune.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +base_test=$CEPH_ROOT/qa/workunits/mon/test_mon_osdmap_prune.sh + +function run() { + + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7115" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_osdmap_prune() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + + # we are getting OSD_OUT_OF_ORDER_FULL health errors, and it's not clear + # why. so, to make the health checks happy, mask those errors. + ceph osd set-full-ratio 0.97 + ceph osd set-backfillfull-ratio 0.97 + + ceph config set osd osd_beacon_report_interval 10 || return 1 + ceph config set mon mon_debug_extra_checks true || return 1 + + ceph config set mon mon_min_osdmap_epochs 100 || return 1 + ceph config set mon mon_osdmap_full_prune_enabled true || return 1 + ceph config set mon mon_osdmap_full_prune_min 200 || return 1 + ceph config set mon mon_osdmap_full_prune_interval 10 || return 1 + ceph config set mon mon_osdmap_full_prune_txsize 100 || return 1 + + + bash -x $base_test || return 1 + + return 0 +} + +main mon-osdmap-prune "$@" + diff --git a/qa/standalone/mon/mon-ping.sh b/qa/standalone/mon/mon-ping.sh new file mode 100755 index 000000000..1f5096be1 --- /dev/null +++ b/qa/standalone/mon/mon-ping.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 SUSE LINUX GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7119" # git grep '\<7119\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_ping() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph ping mon.a || return 1 +} + +main mon-ping "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh" +# End: diff --git a/qa/standalone/mon/mon-scrub.sh b/qa/standalone/mon/mon-scrub.sh new file mode 100755 index 000000000..158bd434c --- /dev/null +++ b/qa/standalone/mon/mon-scrub.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7120" # git grep '\<7120\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_scrub() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph mon scrub || return 1 +} + +main mon-scrub "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-scrub.sh" +# End: diff --git a/qa/standalone/mon/mon-seesaw.sh b/qa/standalone/mon/mon-seesaw.sh new file mode 100755 index 000000000..1c97847b9 --- /dev/null +++ b/qa/standalone/mon/mon-seesaw.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + export BASE_CEPH_ARGS=$CEPH_ARGS + CEPH_ARGS+="--mon-host=$CEPH_MON_A " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_seesaw() { + local dir=$1 + + setup $dir || return + + # start with 1 mon + run_mon $dir aa --public-addr $CEPH_MON_A || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + wait_for_quorum 300 1 || return 1 + + # add in a second + run_mon $dir bb --public-addr $CEPH_MON_B || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 + + # remove the first one + ceph mon rm aa || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_B" + sleep 5 + wait_for_quorum 300 1 || return 1 + + # do some stuff that requires the osds be able to communicate with the + # mons. (see http://tracker.ceph.com/issues/17558) + ceph osd pool create foo 8 + rados -p foo bench 1 write + wait_for_clean || return 1 + + # nuke monstore so that it will rejoin (otherwise we get + # "not in monmap and have been in a quorum before; must have been removed" + rm -rf $dir/aa + + # add a back in + # (use a different addr to avoid bind issues) + run_mon $dir aa --public-addr $CEPH_MON_C || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_C,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 +} + +main mon-seesaw "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh" +# End: diff --git a/qa/standalone/mon/osd-crush.sh b/qa/standalone/mon/osd-crush.sh new file mode 100755 index 000000000..aa7cac694 --- /dev/null +++ b/qa/standalone/mon/osd-crush.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7104" # git grep '\<7104\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_crush_rule_create_simple() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph --format xml osd crush rule dump replicated_rule | \ + egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \ + grep '<op>choose_firstn</op><num>0</num><type>osd</type>' || return 1 + local rule=rule0 + local root=host1 + ceph osd crush add-bucket $root host + local failure_domain=osd + ceph osd crush rule create-simple $rule $root $failure_domain || return 1 + ceph osd crush rule create-simple $rule $root $failure_domain 2>&1 | \ + grep "$rule already exists" || return 1 + ceph --format xml osd crush rule dump $rule | \ + egrep '<op>take</op><item>[^<]+</item><item_name>'$root'</item_name>' | \ + grep '<op>choose_firstn</op><num>0</num><type>'$failure_domain'</type>' || return 1 + ceph osd crush rule rm $rule || return 1 +} + +function TEST_crush_rule_dump() { + local dir=$1 + + run_mon $dir a || return 1 + + local rule=rule1 + ceph osd crush rule create-erasure $rule || return 1 + test $(ceph --format json osd crush rule dump $rule | \ + jq ".rule_name == \"$rule\"") == true || return 1 + test $(ceph --format json osd crush rule dump | \ + jq "map(select(.rule_name == \"$rule\")) | length == 1") == true || return 1 + ! ceph osd crush rule dump non_existent_rule || return 1 + ceph osd crush rule rm $rule || return 1 +} + +function TEST_crush_rule_rm() { + local rule=erasure2 + + run_mon $dir a || return 1 + + ceph osd crush rule create-erasure $rule default || return 1 + ceph osd crush rule ls | grep $rule || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 +} + +function TEST_crush_rule_create_erasure() { + local dir=$1 + + run_mon $dir a || return 1 + # should have at least one OSD + run_osd $dir 0 || return 1 + + local rule=rule3 + # + # create a new rule with the default profile, implicitly + # + ceph osd crush rule create-erasure $rule || return 1 + ceph osd crush rule create-erasure $rule 2>&1 | \ + grep "$rule already exists" || return 1 + ceph --format xml osd crush rule dump $rule | \ + egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \ + grep '<op>chooseleaf_indep</op><num>0</num><type>host</type>' || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 + # + # create a new rule with the default profile, explicitly + # + ceph osd crush rule create-erasure $rule default || return 1 + ceph osd crush rule ls | grep $rule || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 + # + # create a new rule and the default profile, implicitly + # + ceph osd erasure-code-profile rm default || return 1 + ! ceph osd erasure-code-profile ls | grep default || return 1 + ceph osd crush rule create-erasure $rule || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'profile set default' $dir/mon.a.log || return 1 + ceph osd erasure-code-profile ls | grep default || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 +} + +function TEST_add_rule_failed() { + local dir=$1 + + run_mon $dir a || return 1 + + local root=host1 + + ceph osd crush add-bucket $root host + ceph osd crush rule create-simple test_rule1 $root osd firstn || return 1 + ceph osd crush rule create-simple test_rule2 $root osd firstn || return 1 + ceph osd getcrushmap > $dir/crushmap || return 1 + crushtool --decompile $dir/crushmap > $dir/crushmap.txt || return 1 + for i in $(seq 3 255) + do + cat <<EOF +rule test_rule$i { + id $i + type replicated + step take $root + step choose firstn 0 type osd + step emit +} +EOF + done >> $dir/crushmap.txt + crushtool --compile $dir/crushmap.txt -o $dir/crushmap || return 1 + ceph osd setcrushmap -i $dir/crushmap || return 1 + ceph osd crush rule create-simple test_rule_nospace $root osd firstn 2>&1 | grep "Error ENOSPC" || return 1 + +} + +function TEST_crush_rename_bucket() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph osd crush add-bucket host1 host + ceph osd tree + ! ceph osd tree | grep host2 || return 1 + ceph osd crush rename-bucket host1 host2 || return 1 + ceph osd tree + ceph osd tree | grep host2 || return 1 + ceph osd crush rename-bucket host1 host2 || return 1 # idempotency + ceph osd crush rename-bucket nonexistent something 2>&1 | grep "Error ENOENT" || return 1 +} + +function TEST_crush_ls_node() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd crush add-bucket default1 root + ceph osd crush add-bucket host1 host + ceph osd crush move host1 root=default1 + ceph osd crush ls default1 | grep host1 || return 1 + ceph osd crush ls default2 2>&1 | grep "Error ENOENT" || return 1 +} + +function TEST_crush_reject_empty() { + local dir=$1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + # should have at least one OSD + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + + local empty_map=$dir/empty_map + :> $empty_map.txt + crushtool -c $empty_map.txt -o $empty_map.map || return 1 + expect_failure $dir "Error EINVAL" \ + ceph osd setcrushmap -i $empty_map.map || return 1 +} + +main osd-crush "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/osd-crush.sh" +# End: diff --git a/qa/standalone/mon/osd-df.sh b/qa/standalone/mon/osd-df.sh new file mode 100755 index 000000000..962909fdb --- /dev/null +++ b/qa/standalone/mon/osd-df.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_osd_df() { + local dir=$1 + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + # normal case + ceph osd df --f json-pretty | grep osd.0 || return 1 + ceph osd df --f json-pretty | grep osd.1 || return 1 + ceph osd df --f json-pretty | grep osd.2 || return 1 + ceph osd df --f json-pretty | grep osd.3 || return 1 + ceph osd df --f json-pretty | grep osd.4 || return 1 + ceph osd df --f json-pretty | grep osd.5 || return 1 + + # filter by device class + osd_class=$(ceph osd crush get-device-class 0) + ceph osd df class $osd_class --f json-pretty | grep 'osd.0' || return 1 + # post-nautilus we require filter-type no more + ceph osd df $osd_class --f json-pretty | grep 'osd.0' || return 1 + ceph osd crush rm-device-class 0 || return 1 + ceph osd crush set-device-class aaa 0 || return 1 + ceph osd df aaa --f json-pretty | grep 'osd.0' || return 1 + ceph osd df aaa --f json-pretty | grep 'osd.1' && return 1 + # reset osd.1's device class + ceph osd crush rm-device-class 0 || return 1 + ceph osd crush set-device-class $osd_class 0 || return 1 + + # filter by crush node + ceph osd df osd.0 --f json-pretty | grep osd.0 || return 1 + ceph osd df osd.0 --f json-pretty | grep osd.1 && return 1 + ceph osd crush move osd.0 root=default host=foo || return 1 + ceph osd crush move osd.1 root=default host=foo || return 1 + ceph osd crush move osd.2 root=default host=foo || return 1 + ceph osd crush move osd.3 root=default host=bar || return 1 + ceph osd crush move osd.4 root=default host=bar || return 1 + ceph osd crush move osd.5 root=default host=bar || return 1 + ceph osd df tree foo --f json-pretty | grep foo || return 1 + ceph osd df tree foo --f json-pretty | grep bar && return 1 + ceph osd df foo --f json-pretty | grep osd.0 || return 1 + ceph osd df foo --f json-pretty | grep osd.1 || return 1 + ceph osd df foo --f json-pretty | grep osd.2 || return 1 + ceph osd df foo --f json-pretty | grep osd.3 && return 1 + ceph osd df foo --f json-pretty | grep osd.4 && return 1 + ceph osd df foo --f json-pretty | grep osd.5 && return 1 + ceph osd df tree bar --f json-pretty | grep bar || return 1 + ceph osd df tree bar --f json-pretty | grep foo && return 1 + ceph osd df bar --f json-pretty | grep osd.0 && return 1 + ceph osd df bar --f json-pretty | grep osd.1 && return 1 + ceph osd df bar --f json-pretty | grep osd.2 && return 1 + ceph osd df bar --f json-pretty | grep osd.3 || return 1 + ceph osd df bar --f json-pretty | grep osd.4 || return 1 + ceph osd df bar --f json-pretty | grep osd.5 || return 1 + + # filter by pool + ceph osd crush rm-device-class all || return 1 + ceph osd crush set-device-class nvme 0 1 3 4 || return 1 + ceph osd crush rule create-replicated nvme-rule default host nvme || return 1 + ceph osd pool create nvme-pool 12 12 nvme-rule || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.0 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.1 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.2 && return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.3 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.4 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.5 && return 1 + + teardown $dir || return 1 +} + +main osd-df "$@" diff --git a/qa/standalone/mon/osd-erasure-code-profile.sh b/qa/standalone/mon/osd-erasure-code-profile.sh new file mode 100755 index 000000000..0afc5fc0b --- /dev/null +++ b/qa/standalone/mon/osd-erasure-code-profile.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7220" # git grep '\<7220\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_set() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local profile=myprofile + # + # no key=value pairs : use the default configuration + # + ceph osd erasure-code-profile set $profile 2>&1 || return 1 + ceph osd erasure-code-profile get $profile | \ + grep plugin=jerasure || return 1 + ceph osd erasure-code-profile rm $profile + # + # key=value pairs override the default + # + ceph osd erasure-code-profile set $profile \ + key=value plugin=isa || return 1 + ceph osd erasure-code-profile get $profile | \ + grep -e key=value -e plugin=isa || return 1 + # + # --force is required to override an existing profile + # + ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1 + grep 'will not override' $dir/out || return 1 + ceph osd erasure-code-profile set $profile key=other --force || return 1 + ceph osd erasure-code-profile get $profile | \ + grep key=other || return 1 + + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_ls() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local profile=myprofile + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph --format xml osd erasure-code-profile ls | \ + grep "<profile>$profile</profile>" || return 1 + + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_rm() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local profile=myprofile + ceph osd erasure-code-profile set $profile 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile rm $profile || return 1 + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile rm WRONG 2>&1 | \ + grep "WRONG does not exist" || return 1 + + ceph osd erasure-code-profile set $profile || return 1 + create_pool poolname 12 12 erasure $profile || return 1 + ! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1 + grep "poolname.*using.*$profile" $dir/out || return 1 + ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1 + ceph osd erasure-code-profile rm $profile || return 1 + + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_get() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local default_profile=default + ceph osd erasure-code-profile get $default_profile | \ + grep plugin=jerasure || return 1 + ceph --format xml osd erasure-code-profile get $default_profile | \ + grep '<plugin>jerasure</plugin>' || return 1 + ! ceph osd erasure-code-profile get WRONG > $dir/out 2>&1 || return 1 + grep -q "unknown erasure code profile 'WRONG'" $dir/out || return 1 +} + +function TEST_set_idempotent() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + # + # The default profile is set using a code path different from + # ceph osd erasure-code-profile set: verify that it is idempotent, + # as if it was using the same code path. + # + ceph osd erasure-code-profile set default k=2 m=2 2>&1 || return 1 + local profile + # + # Because plugin=jerasure is the default, it uses a slightly + # different code path where defaults (m=1 for instance) are added + # implicitly. + # + profile=profileidempotent1 + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile rm $profile # cleanup + + # + # In the general case the profile is exactly what is on + # + profile=profileidempotent2 + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_format_invalid() { + local dir=$1 + + local profile=profile + # osd_pool_default_erasure-code-profile is + # valid JSON but not of the expected type + run_mon $dir a \ + --osd_pool_default_erasure-code-profile 1 || return 1 + ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1 + cat $dir/out + grep 'must be a JSON object' $dir/out || return 1 +} + +function TEST_format_json() { + local dir=$1 + + # osd_pool_default_erasure-code-profile is JSON + expected='"plugin":"isa"' + run_mon $dir a \ + --osd_pool_default_erasure-code-profile "{$expected}" || return 1 + ceph --format json osd erasure-code-profile get default | \ + grep "$expected" || return 1 +} + +function TEST_format_plain() { + local dir=$1 + + # osd_pool_default_erasure-code-profile is plain text + expected='"plugin":"isa"' + run_mon $dir a \ + --osd_pool_default_erasure-code-profile "plugin=isa" || return 1 + ceph --format json osd erasure-code-profile get default | \ + grep "$expected" || return 1 +} + +function TEST_profile_k_sanity() { + local dir=$1 + local profile=profile-sanity + + run_mon $dir a || return 1 + + expect_failure $dir 'k must be a multiple of (k + m) / l' \ + ceph osd erasure-code-profile set $profile \ + plugin=lrc \ + l=1 \ + k=1 \ + m=1 || return 1 + + if erasure_code_plugin_exists isa ; then + expect_failure $dir 'k=1 must be >= 2' \ + ceph osd erasure-code-profile set $profile \ + plugin=isa \ + k=1 \ + m=1 || return 1 + else + echo "SKIP because plugin isa has not been built" + fi + + expect_failure $dir 'k=1 must be >= 2' \ + ceph osd erasure-code-profile set $profile \ + plugin=jerasure \ + k=1 \ + m=1 || return 1 +} + +function TEST_invalid_crush_failure_domain() { + local dir=$1 + + run_mon $dir a || return 1 + + local profile=ec_profile + local crush_failure_domain=invalid_failure_domain + + ! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1 +} + +main osd-erasure-code-profile "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/osd-erasure-code-profile.sh" +# End: diff --git a/qa/standalone/mon/osd-pool-create.sh b/qa/standalone/mon/osd-pool-create.sh new file mode 100755 index 000000000..6d2c5ad3e --- /dev/null +++ b/qa/standalone/mon/osd-pool-create.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013, 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7105" # git grep '\<7105\>' : there must be only one + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export CEPH_ARGS + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +# Before http://tracker.ceph.com/issues/8307 the invalid profile was created +function TEST_erasure_invalid_profile() { + local dir=$1 + run_mon $dir a || return 1 + local poolname=pool_erasure + local notaprofile=not-a-valid-erasure-code-profile + ! ceph osd pool create $poolname 12 12 erasure $notaprofile || return 1 + ! ceph osd erasure-code-profile ls | grep $notaprofile || return 1 +} + +function TEST_erasure_crush_rule() { + local dir=$1 + run_mon $dir a || return 1 + # + # choose the crush rule used with an erasure coded pool + # + local crush_rule=myrule + ! ceph osd crush rule ls | grep $crush_rule || return 1 + ceph osd crush rule create-erasure $crush_rule + ceph osd crush rule ls | grep $crush_rule + local poolname + poolname=pool_erasure1 + ! ceph --format json osd dump | grep '"crush_rule":1' || return 1 + ceph osd pool create $poolname 12 12 erasure default $crush_rule + ceph --format json osd dump | grep '"crush_rule":1' || return 1 + # + # a crush rule by the name of the pool is implicitly created + # + poolname=pool_erasure2 + ceph osd erasure-code-profile set myprofile + ceph osd pool create $poolname 12 12 erasure myprofile + ceph osd crush rule ls | grep $poolname || return 1 + # + # a non existent crush rule given in argument is an error + # http://tracker.ceph.com/issues/9304 + # + poolname=pool_erasure3 + ! ceph osd pool create $poolname 12 12 erasure myprofile INVALIDRULE || return 1 +} + +function TEST_erasure_code_profile_default() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd erasure-code-profile rm default || return 1 + ! ceph osd erasure-code-profile ls | grep default || return 1 + ceph osd pool create $poolname 12 12 erasure default + ceph osd erasure-code-profile ls | grep default || return 1 +} + +function TEST_erasure_crush_stripe_unit() { + local dir=$1 + # the default stripe unit is used to initialize the pool + run_mon $dir a --public-addr $CEPH_MON + stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit) + eval local $(ceph osd erasure-code-profile get myprofile | grep k=) + stripe_width = $((stripe_unit * k)) + ceph osd pool create pool_erasure 12 12 erasure + ceph --format json osd dump | tee $dir/osd.json + grep '"stripe_width":'$stripe_width $dir/osd.json > /dev/null || return 1 +} + +function TEST_erasure_crush_stripe_unit_padded() { + local dir=$1 + # setting osd_pool_erasure_code_stripe_unit modifies the stripe_width + # and it is padded as required by the default plugin + profile+=" plugin=jerasure" + profile+=" technique=reed_sol_van" + k=4 + profile+=" k=$k" + profile+=" m=2" + actual_stripe_unit=2048 + desired_stripe_unit=$((actual_stripe_unit - 1)) + actual_stripe_width=$((actual_stripe_unit * k)) + run_mon $dir a \ + --osd_pool_erasure_code_stripe_unit $desired_stripe_unit \ + --osd_pool_default_erasure_code_profile "$profile" || return 1 + ceph osd pool create pool_erasure 12 12 erasure + ceph osd dump | tee $dir/osd.json + grep "stripe_width $actual_stripe_width" $dir/osd.json > /dev/null || return 1 +} + +function TEST_erasure_code_pool() { + local dir=$1 + run_mon $dir a || return 1 + ceph --format json osd dump > $dir/osd.json + local expected='"erasure_code_profile":"default"' + ! grep "$expected" $dir/osd.json || return 1 + ceph osd pool create erasurecodes 12 12 erasure + ceph --format json osd dump | tee $dir/osd.json + grep "$expected" $dir/osd.json > /dev/null || return 1 + + ceph osd pool create erasurecodes 12 12 erasure 2>&1 | \ + grep 'already exists' || return 1 + ceph osd pool create erasurecodes 12 12 2>&1 | \ + grep 'cannot change to type replicated' || return 1 +} + +function TEST_replicated_pool_with_rule() { + local dir=$1 + run_mon $dir a + local rule=rule0 + local root=host1 + ceph osd crush add-bucket $root host + local failure_domain=osd + local poolname=mypool + ceph osd crush rule create-simple $rule $root $failure_domain || return 1 + ceph osd crush rule ls | grep $rule + ceph osd pool create $poolname 12 12 replicated $rule || return 1 + rule_id=`ceph osd crush rule dump $rule | grep "rule_id" | awk -F[' ':,] '{print $4}'` + ceph osd pool get $poolname crush_rule 2>&1 | \ + grep "crush_rule: $rule_id" || return 1 + #non-existent crush rule + ceph osd pool create newpool 12 12 replicated non-existent 2>&1 | \ + grep "doesn't exist" || return 1 +} + +function TEST_erasure_code_pool_lrc() { + local dir=$1 + run_mon $dir a || return 1 + + ceph osd erasure-code-profile set LRCprofile \ + plugin=lrc \ + mapping=DD_ \ + layers='[ [ "DDc", "" ] ]' || return 1 + + ceph --format json osd dump > $dir/osd.json + local expected='"erasure_code_profile":"LRCprofile"' + local poolname=erasurecodes + ! grep "$expected" $dir/osd.json || return 1 + ceph osd pool create $poolname 12 12 erasure LRCprofile + ceph --format json osd dump | tee $dir/osd.json + grep "$expected" $dir/osd.json > /dev/null || return 1 + ceph osd crush rule ls | grep $poolname || return 1 +} + +function TEST_replicated_pool() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd pool create replicated 12 12 replicated replicated_rule || return 1 + ceph osd pool create replicated 12 12 replicated replicated_rule 2>&1 | \ + grep 'already exists' || return 1 + # default is replicated + ceph osd pool create replicated1 12 12 || return 1 + # default is replicated, pgp_num = pg_num + ceph osd pool create replicated2 12 || return 1 + ceph osd pool create replicated 12 12 erasure 2>&1 | \ + grep 'cannot change to type erasure' || return 1 +} + +function TEST_no_pool_delete() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd pool create foo 1 || return 1 + ceph tell mon.a injectargs -- --no-mon-allow-pool-delete || return 1 + ! ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1 + ceph tell mon.a injectargs -- --mon-allow-pool-delete || return 1 + ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1 +} + +function TEST_utf8_cli() { + local dir=$1 + run_mon $dir a || return 1 + # Hopefully it's safe to include literal UTF-8 characters to test + # the fix for http://tracker.ceph.com/issues/7387. If it turns out + # to not be OK (when is the default encoding *not* UTF-8?), maybe + # the character '黄' can be replaced with the escape $'\xe9\xbb\x84' + OLDLANG="$LANG" + export LANG=en_US.UTF-8 + ceph osd pool create 黄 16 || return 1 + ceph osd lspools 2>&1 | \ + grep "黄" || return 1 + ceph -f json-pretty osd dump | \ + python3 -c "import json; import sys; json.load(sys.stdin)" || return 1 + ceph osd pool delete 黄 黄 --yes-i-really-really-mean-it + export LANG="$OLDLANG" +} + +function check_pool_priority() { + local dir=$1 + shift + local pools=$1 + shift + local spread="$1" + shift + local results="$1" + + setup $dir || return 1 + + EXTRA_OPTS="--debug_allow_any_pool_priority=true" + export EXTRA_OPTS + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + # Add pool 0 too + for i in $(seq 0 $pools) + do + num=$(expr $i + 1) + ceph osd pool create test${num} 1 1 + done + + wait_for_clean || return 1 + for i in $(seq 0 $pools) + do + num=$(expr $i + 1) + ceph osd pool set test${num} recovery_priority $(expr $i \* $spread) + done + + #grep "recovery_priority.*pool set" out/mon.a.log + + bin/ceph osd dump + + # Restart everything so mon converts the priorities + kill_daemons + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + sleep 5 + + grep convert $dir/mon.a.log + ceph osd dump + + pos=1 + for i in $(ceph osd dump | grep ^pool | sed 's/.*recovery_priority //' | awk '{ print $1 }') + do + result=$(echo $results | awk "{ print \$${pos} }") + # A value of 0 is an unset value so sed/awk gets "pool" + if test $result = "0" + then + result="pool" + fi + test "$result" = "$i" || return 1 + pos=$(expr $pos + 1) + done +} + +function TEST_pool_pos_only_prio() { + local dir=$1 + check_pool_priority $dir 20 5 "0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10" || return 1 +} + +function TEST_pool_neg_only_prio() { + local dir=$1 + check_pool_priority $dir 20 -5 "0 0 -1 -1 -2 -2 -3 -3 -4 -4 -5 -5 -6 -6 -7 -7 -8 -8 -9 -9 -10" || return 1 +} + +function TEST_pool_both_prio() { + local dir=$1 + check_pool_priority $dir 20 "5 - 50" "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10" || return 1 +} + +function TEST_pool_both_prio_no_neg() { + local dir=$1 + check_pool_priority $dir 20 "2 - 4" "-4 -2 0 0 1 1 2 2 3 3 4 5 5 6 6 7 7 8 8 9 10" || return 1 +} + +function TEST_pool_both_prio_no_pos() { + local dir=$1 + check_pool_priority $dir 20 "2 - 36" "-10 -9 -8 -8 -7 -7 -6 -6 -5 -5 -4 -3 -3 -2 -2 -1 -1 0 0 2 4" || return 1 +} + + +main osd-pool-create "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/osd-pool-create.sh" +# End: diff --git a/qa/standalone/mon/osd-pool-df.sh b/qa/standalone/mon/osd-pool-df.sh new file mode 100755 index 000000000..d2b80ec72 --- /dev/null +++ b/qa/standalone/mon/osd-pool-df.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Tencent <contact@tencent.com> +# +# Author: Chang Liu <liuchang0812@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_ceph_df() { + local dir=$1 + setup $dir || return 1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + run_mgr $dir x || return 1 + + profile+=" plugin=jerasure" + profile+=" technique=reed_sol_van" + profile+=" k=4" + profile+=" m=2" + profile+=" crush-failure-domain=osd" + + ceph osd erasure-code-profile set ec42profile ${profile} + + local rep_poolname=testcephdf_replicate + local ec_poolname=testcephdf_erasurecode + create_pool $rep_poolname 6 6 replicated + create_pool $ec_poolname 6 6 erasure ec42profile + flush_pg_stats + + local global_avail=`ceph df -f json | jq '.stats.total_avail_bytes'` + local rep_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$rep_poolname'"))[0].stats.max_avail'` + local ec_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$ec_poolname'"))[0].stats.max_avail'` + + echo "${global_avail} >= ${rep_avail}*3" | bc || return 1 + echo "${global_avail} >= ${ec_avail}*1.5" | bc || return 1 + + ceph osd pool delete $rep_poolname $rep_poolname --yes-i-really-really-mean-it + ceph osd pool delete $ec_poolname $ec_poolname --yes-i-really-really-mean-it + ceph osd erasure-code-profile rm ec42profile + teardown $dir || return 1 +} + +main osd-pool-df "$@" diff --git a/qa/standalone/mon/test_pool_quota.sh b/qa/standalone/mon/test_pool_quota.sh new file mode 100755 index 000000000..b87ec2232 --- /dev/null +++ b/qa/standalone/mon/test_pool_quota.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# +# Generic pool quota test +# + +# Includes + + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:17108" # git grep '\<17108\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +function TEST_pool_quota() { + local dir=$1 + setup $dir || return 1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + local poolname=testquota + create_pool $poolname 20 + local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'` + local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'` + + echo $objects + echo $bytes + if [ $objects != 'N/A' ] || [ $bytes != 'N/A' ] ; + then + return 1 + fi + + ceph osd pool set-quota $poolname max_objects 1000 + ceph osd pool set-quota $poolname max_bytes 1024 + + objects=`ceph df detail | grep -w $poolname|awk '{print $3}'` + bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'` + + if [ $objects != '1000' ] || [ $bytes != '1K' ] ; + then + return 1 + fi + + ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it + teardown $dir || return 1 +} + +main testpoolquota diff --git a/qa/standalone/osd-backfill/osd-backfill-prio.sh b/qa/standalone/osd-backfill/osd-backfill-prio.sh new file mode 100755 index 000000000..9749ca34c --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-prio.sh @@ -0,0 +1,522 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + # Set osd op queue = wpq for the tests. Backfill priority is not + # considered by mclock_scheduler leading to unexpected results. + CEPH_ARGS+="--osd-op-queue=wpq " + export objects=50 + export poolprefix=test + export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED + export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10 + export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_backfill_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + local degraded_prio=$(expr $DEGRADED_PRIO + 1) + local max_tries=10 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool3 size 1 --yes-i-really-mean-it + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set nobackfill + ceph osd set noout + + # Get a pg to want to backfill and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-backfill" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-backfill" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + ceph pg cancel-force-backfill $PG3 || return 1 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-backfill $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-backfill $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset nobackfill + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to the backfill priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start backfill by changing the pool sizes from 1 to 2 +# Use dump_recovery_reservations to verify priorities +function TEST_backfill_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio) + pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG ${PG2} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-backfill-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh new file mode 100755 index 000000000..f9a144932 --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7129" # git grep '\<7129\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function _common_test() { + local dir=$1 + local extra_opts="$2" + local loglen="$3" + local dupslen="$4" + local objects="$5" + local moreobjects=${6:-0} + + local OSDS=6 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + export EXTRA_OPTS=" $extra_opts" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + create_pool test 1 1 + + for j in $(seq 1 $objects) + do + rados -p test put obj-${j} /etc/passwd + done + + # Mark out all OSDs for this pool + ceph osd out $(ceph pg dump pgs --format=json | jq '.pg_stats[0].up[]') + if [ "$moreobjects" != "0" ]; then + for j in $(seq 1 $moreobjects) + do + rados -p test put obj-more-${j} /etc/passwd + done + fi + sleep 1 + wait_for_clean + + flush_pg_stats + + newprimary=$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary') + kill_daemons + + ERRORS=0 + _objectstore_tool_nodown $dir $newprimary --no-mon-config --pgid 1.0 --op log | tee $dir/result.log + LOGLEN=$(jq '.pg_log_t.log | length' $dir/result.log) + if [ $LOGLEN != "$loglen" ]; then + echo "FAILED: Wrong log length got $LOGLEN (expected $loglen)" + ERRORS=$(expr $ERRORS + 1) + fi + DUPSLEN=$(jq '.pg_log_t.dups | length' $dir/result.log) + if [ $DUPSLEN != "$dupslen" ]; then + echo "FAILED: Wrong dups length got $DUPSLEN (expected $dupslen)" + ERRORS=$(expr $ERRORS + 1) + fi + grep "copy_up_to\|copy_after" $dir/osd.*.log + rm -f $dir/result.log + if [ $ERRORS != "0" ]; then + echo TEST FAILED + return 1 + fi +} + + +# Cause copy_up_to() to only partially copy logs, copy additional dups, and trim dups +function TEST_backfill_log_1() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2 --osd_pg_log_dups_tracked=10" 2 8 150 +} + + +# Cause copy_up_to() to only partially copy logs, copy additional dups +function TEST_backfill_log_2() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2" 2 148 150 +} + + +# Cause copy_after() to only copy logs, no dups +function TEST_recovery_1() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=50 --osd_max_pg_log_entries=50 --osd_pg_log_dups_tracked=60 --osd_pg_log_trim_min=10" 40 0 40 +} + + +# Cause copy_after() to copy logs with dups +function TEST_recovery_2() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=150 --osd_max_pg_log_entries=150 --osd_pg_log_dups_tracked=3000 --osd_pg_log_trim_min=10" 151 10 141 20 +} + +main osd-backfill-recovery-log "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-recovery-log.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh new file mode 100755 index 000000000..6a5c69412 --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-space.sh @@ -0,0 +1,1176 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + CEPH_ARGS+="--fake_statfs_for_testing=3686400 " + CEPH_ARGS+="--osd_max_backfills=10 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_not_state() { + local state=$1 + local num_in_state=-1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state = "0" && break + if test $cur_in_state != $num_in_state ; then + loop=0 + num_in_state=$cur_in_state + elif (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_not_backfilling() { + local timeout=$1 + wait_for_not_state backfilling $timeout +} + + +function wait_for_not_activating() { + local timeout=$1 + wait_for_not_state activating $timeout +} + +# All tests are created in an environment which has fake total space +# of 3600K (3686400) which can hold 600 6K replicated objects or +# 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool +# we have a theoretical 54K object but with the chunk size of 4K +# and a rounding of 4K to account for the chunks is 36K max object +# which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of +# 3600K for a shard. + +# Create 2 pools with size 1 +# Write enough data that only 1 pool pg can fit per osd +# Incresase the pool size to 2 +# On 3 OSDs this should result in 1 OSD with overlapping replicas, +# so both pools can't fit. We assume pgid 1.0 and 2.0 won't +# map to the same 2 OSDs. +# At least 1 pool shouldn't have room to backfill +# All other pools should go active+clean +function TEST_backfill_test_simple() { + local dir=$1 + local pools=2 + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + # This won't work is if the 2 pools primary and only osds + # are the same. + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + expected="$(expr $pools - 1)" + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; + then + echo "$expected didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + + +# Create 8 pools of size 1 on 20 OSDs +# Write 4K * 600 objects (only 1 pool pg can fit on any given osd) +# Increase pool size to 2 +# At least 1 pool shouldn't have room to backfill +# All other pools should go active+clean +function TEST_backfill_test_multi() { + local dir=$1 + local pools=8 + local OSDS=20 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" + if [ "$full" -lt "1" ]; + then + echo "At least one pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + expected="$(expr $pools - $full)" + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; + then + echo "$expected didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_WARN" ]; then + echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then + echo "PG_BACKFILL_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + + +# To make sure that when 2 pg try to backfill at the same time to +# the same target. This might be covered by the simple test above +# but this makes sure we get it. +# +# Create 10 pools of size 2 and identify 2 that have the same +# non-primary osd. +# Delete all other pools +# Set size to 1 and write 4K * 600 to each pool +# Set size back to 2 +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function TEST_backfill_test_sametarget() { + local dir=$1 + local pools=10 + local OSDS=5 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with a pg that distinct primaries but second + # replica on the same osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1 + local chk_osd2 + + local PG2 + local POOLNUM2 + local pool2 + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ $p = "1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1=$test_osd1 + chk_osd2=$test_osd2 + elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for i in $(seq 1 $objects) + do + rados -p $pool1 put obj$i $dir/datafile + rados -p $pool2 put obj$i $dir/datafile + done + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# 2 pools can't both backfill to a target which has other data +# 1 of the pools has objects that increase from 1024 to 2611 bytes +# +# Write to fill pool which is size 1 +# Take fill pool osd down (other 2 pools must go to the remaining OSDs +# Save an export of data on fill OSD and restart it +# Write an intial 1K to pool1 which has pg 2.0 +# Export 2.0 from non-fillpool OSD don't wait for it to start-up +# Take down fillpool OSD +# Put 1K object version of 2.0 on fillpool OSD +# Put back fillpool data on fillpool OSD +# With fillpool down write 2611 byte objects +# Take down $osd and bring back $fillosd simultaneously +# Wait for backfilling +# One PG will be able to backfill its remaining data +# One PG must get backfill_toofull +function TEST_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + + wait_for_clean || return 1 + + # Partially fill an osd + # We have room for 600 6K replicated objects, if we create 2611 byte objects + # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one + # replica from the other 2 is 85% of 3600K + + dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 + for o in $(seq 1 $objects) + do + rados -p fillpool put obj-fill-${o} $dir/datafile + done + + local fillosd=$(get_primary fillpool obj-fill-1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + + _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1 + activate_osd $dir $fillosd || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile + done + + ceph pg dump pgs + # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time + _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out + kill_daemon $dir/osd.$fillosd.pid TERM + _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0 + _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1 + _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1 + ceph pg dump pgs + sleep 20 + ceph pg dump pgs + + # re-write everything + dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile + done + done + + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + flush_pg_stats || return 1 + ceph pg dump pgs + + ERRORS=0 + if [ "$(get_num_in_state backfill_toofull)" != "1" ]; + then + echo "One PG should be in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(get_num_in_state active+clean)" != "2" ]; + then + echo "Two PGs should be active+clean after one PG completed backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# Make sure that the amount of bytes already on the replica doesn't +# cause an out of space condition +# +# Create 1 pool and write 4K * 600 objects +# Remove 25% (150) of the objects with one OSD down (noout set) +# Increase the size of the remaining 75% (450) of the objects to 6K +# Bring back down OSD +# The pool should go active+clean +function TEST_backfill_grow() { + local dir=$1 + local poolname="test" + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + sleep 5 + + wait_for_clean || return 1 + + dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4 + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i $dir/4kdata + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + kill_daemons $dir TERM $otherosd || return 1 + + rmobjects=$(expr $objects / 4) + for i in $(seq 1 $rmobjects) + do + rados -p $poolname rm obj$i + done + + dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1 + for i in $(seq $(expr $rmobjects + 1) $objects) + do + rados -p $poolname put obj$i $dir/6kdata + done + + activate_osd $dir $otherosd || return 1 + + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# Create a 5 shard EC pool on 6 OSD cluster +# Fill 1 OSD with 2600K of data take that osd down. +# Write the EC pool on 5 OSDs +# Take down 1 (must contain an EC shard) +# Bring up OSD with fill data +# Not enought room to backfill to partially full OSD +function TEST_ec_backfill_simple() { + local dir=$1 + local EC=$2 + local pools=1 + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 13K objects + # there is only 3600K - (13K * 200) = 1000K which won't hold + # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K + # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which + # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical + # bytes in the pool. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=13 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + sleep 5 + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + sleep 2 + ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 5 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=18 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + ceph pg dump pgs + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +function osdlist() { + local OSDS=$1 + local excludeosd=$2 + + osds="" + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if [ $osd = $excludeosd ]; + then + continue + fi + if [ -n "$osds" ]; then + osds="${osds} " + fi + osds="${osds}${osd}" + done + echo $osds +} + +# Create a pool with size 1 and fill with data so that only 1 EC shard can fit. +# Write data to 2 EC pools mapped to the same OSDs (excluding filled one) +# Remap the last OSD to partially full OSD on both pools +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function TEST_ec_backfill_multi() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # This test requires that shards from 2 different pools + # fit on a given OSD, but both will not fix. I'm using + # making the fillosd plus 1 shard use 75% of the space, + # leaving not enough to be under the 85% set here. + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 9K objects + # there is only 3600K - (9K * 200) = 1800K which will only hold + # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K + # The actual data will be (12K / 3) * 200 = 800K because the extra + # is the reservation padding for chunking. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + nonfillosds="$(osdlist $OSDS $fillosd)" + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 15 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd + done + + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +# Similar to TEST_ec_backfill_multi but one of the ec pools +# already had some data on the target OSD + +# Create a pool with size 1 and fill with data so that only 1 EC shard can fit. +# Write a small amount of data to 1 EC pool that still includes the filled one +# Take down fillosd with noout set +# Write data to 2 EC pools mapped to the same OSDs (excluding filled one) +# Remap the last OSD to partially full OSD on both pools +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function SKIP_TEST_ec_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=5 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + local lastosd=$(expr $OSDS - 1) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # This test requires that shards from 2 different pools + # fit on a given OSD, but both will not fix. I'm using + # making the fillosd plus 1 shard use 75% of the space, + # leaving not enough to be under the 85% set here. + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + # last osd + ceph osd pg-upmap 1.0 $lastosd + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 9K objects + # there is only 3600K - (9K * 200) = 1800K which will only hold + # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K + # The actual data will be (12K / 3) * 200 = 800K because the extra + # is the reservation padding for chunking. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + nonfillosds="$(osdlist $OSDS $fillosd)" + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 15 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 + for o in $(seq 1 $ecobjects) + do + rados -p "${poolprefix}1" put obj$o-1 $dir/datafile + done + + for p in $(seq 1 $pools) + do + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1)) + done + ceph pg dump pgs + + #ceph osd set noout + #kill_daemons $dir TERM osd.$lastosd || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile + done + done + + ceph pg dump pgs + + # Now backfill lastosd by adding back into the upmap + for p in $(seq 1 $pools) + do + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) + done + #activate_osd $dir $lastosd || return 1 + #ceph tell osd.0 debug kick_recovery_wq 0 + + sleep 30 + ceph pg dump pgs + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +function SKIP_TEST_ec_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=6 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Below we need to fit 3200K in 3600K which is 88% + # so set to 90% + ceph osd set-backfillfull-ratio .90 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 48K ec objects, if we create 4k replicated objects + # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard + # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each. + # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + sleep 5 + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + sleep 2 + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 5 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + #ceph pg map 2.0 --format=json | jq '.' + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out + _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +# Create 1 EC pool +# Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K +# Take 1 shard's OSD down (with noout set) +# Remove 50 objects ((12K / 3) + 4k) * 50) = 400K +# Write 150 36K objects (grow 150 objects) 2400K +# But there is already 1600K usage so backfill +# would be too full if it didn't account for existing data +# Bring back down OSD so it must backfill +# It should go active+clean taking into account data already there +function TEST_ec_backfill_grow() { + local dir=$1 + local poolname="test" + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + ceph osd pool create $poolname 1 1 erasure ec-profile + + wait_for_clean || return 1 + + dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12 + for i in $(seq 1 $ecobjects) + do + rados -p $poolname put obj$i $dir/12kdata + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + kill_daemons $dir TERM $otherosd || return 1 + + rmobjects=$(expr $ecobjects / 4) + for i in $(seq 1 $rmobjects) + do + rados -p $poolname rm obj$i + done + + dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36 + for i in $(seq $(expr $rmobjects + 1) $ecobjects) + do + rados -p $poolname put obj$i $dir/36kdata + done + + activate_osd $dir $otherosd || return 1 + + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main osd-backfill-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-stats.sh b/qa/standalone/osd-backfill/osd-backfill-stats.sh new file mode 100755 index 000000000..21b42a4ce --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-stats.sh @@ -0,0 +1,761 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function below_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 )) +} + +function above_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 )) +} + +FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"' +FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"' +FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"' + +function check() { + local dir=$1 + local PG=$2 + local primary=$3 + local type=$4 + local degraded_start=$5 + local degraded_end=$6 + local misplaced_start=$7 + local misplaced_end=$8 + local primary_start=${9:-} + local primary_end=${10:-} + local check_setup=${11:-true} + + local log=$(grep -l +backfilling $dir/osd.$primary.log) + if [ $check_setup = "true" ]; + then + local alllogs=$(grep -l +backfilling $dir/osd.*.log) + if [ "$(echo "$alllogs" | wc -w)" != "1" ]; + then + echo "Test setup failure, a single OSD should have performed backfill" + return 1 + fi + fi + + local addp=" " + if [ "$type" = "erasure" ]; + then + addp="p" + fi + + UPACT=$(eval $FIND_UPACT) + [ -n "$UPACT" ] || return 1 + + # Check 3rd line at start because of false recovery starts + local which="degraded" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $degraded_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $degraded_end || return 1 + + # Check 3rd line at start because of false recovery starts + which="misplaced" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $misplaced_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $misplaced_end || return 1 + + # This is the value of set into MISSING_ON_PRIMARY + if [ -n "$primary_start" ]; + then + which="shard $primary" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $primary_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $primary_end || return 1 + fi +} + +# [1] -> [1, 0, 2] +# degraded 1000 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466 22'500 26:617 [1,0,2] 1 [1] 1 0'0 2017-10-27 09:43:44.654882 0'0 2017-10-27 09:43:44.654882 +function TEST_backfill_sizeup() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + ceph osd set nobackfill + ceph osd pool set $poolname size 3 + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + + local degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $degraded 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + + +# [1] -> [0, 2, 4] +# degraded 1000 -> 0 +# misplaced 500 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 500 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849 22'500 26:603 [0,2,4] 0 [1] 1 0'0 2017-10-27 09:48:13.236253 0'0 2017-10-27 09:48:13.236253 +function TEST_backfill_sizeup_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$primary + ceph osd pool set $poolname size 3 + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + local degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1 0] -> [1,2]/[1,0] +# misplaced 500 -> 0 +# state: active+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-10-27 09:51:18.800517 22'500 25:570 [1,2] 1 [1,0] 1 0'0 2017-10-27 09:50:40.441274 0'0 2017-10-27 09:50:40.441274 +function TEST_backfill_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$(get_not_primary $poolname obj1) + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + check $dir $PG $primary replicated 0 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [0, 1] -> [0, 2]/[0] +# osd 1 down/out +# degraded 500 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091 22'500 27:719 [0,2] 0 [0] 0 0'0 2017-10-27 09:52:43.188368 0'0 2017-10-27 09:52:43.188368 +function TEST_backfill_down_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + check $dir $PG $primary replicated $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1, 0] -> [2, 3, 4] +# degraded 500 -> 0 +# misplaced 1000 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722 23'500 27:553 [2,4,3] 2 [1,0] 1 0'0 2017-10-27 09:55:10.230919 0'0 2017-10-27 09:55:10.230919 +function TEST_backfill_out2() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd pool set $poolname size 3 + ceph osd out osd.${otherosd} + ceph osd out osd.${primary} + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + + check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [0,1] -> [2,4,3]/[0,1] +# degraded 1000 -> 0 +# misplaced 1000 -> 500 +# state ends at active+clean+remapped [2,4,3]/[2,4,3,0] +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149 19'500 23:1817 [2,4,3] 2 [0,1] 0 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904 +# ENDS: +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-30 18:22:42.293730 19'500 25:2557 [2,4,3] 2 [2,4,3,0] 2 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904 +function TEST_backfill_sizeup4_allout() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$otherosd + ceph osd out osd.$primary + ceph osd pool set $poolname size 4 + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misdeg=$(expr $objects \* 2) + check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,2,0] -> [3]/[1,2] +# misplaced 1000 -> 500 +# state ends at active+clean+remapped [3]/[3,1] +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 1000 0 0 100 100 active+remapped+backfilling 2017-11-28 19:13:56.092439 21'500 31:790 [3] 3 [1,2] 1 0'0 2017-11-28 19:13:28.698661 0'0 2017-11-28 19:13:28.698661 +function TEST_backfill_remapped() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.${otherosd} + for i in $(get_osds $poolname obj1) + do + if [ $i = $primary -o $i = $otherosd ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd out osd.${primary} + ceph osd pool set $poolname size 2 + sleep 2 + + # primary may change due to invalidating the old pg_temp, which was [1,2,0], + # but up_primary (3) chooses [0,1] for acting. + primary=$(get_primary $poolname obj1) + + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + + check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0,2] -> [4,3,NONE]/[1,0,2] +# misplaced 1500 -> 500 +# state ends at active+clean+remapped [4,3,NONE]/[4,3,2] + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 1500 0 0 100 100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126 19'500 23:615 [4,3,NONE] 4 [1,0,2] 1 0'0 2017-10-31 16:52:59.624429 0'0 2017-10-31 16:52:59.624429 + + +# ENDS: + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-31 16:48:34.414040 19'500 25:2049 [4,3,NONE] 4 [4,3,2] 4 0'0 2017-10-31 16:46:58.203440 0'0 2017-10-31 16:46:58.203440 +function TEST_backfill_ec_all_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + for o in $(get_osds $poolname obj1) + do + ceph osd out osd.$o + done + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 3) + check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,0,2] -> [4, 0, 2] +# misplaced 500 -> 0 +# active+remapped+backfilling +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-11-08 18:05:39.036420 24'500 27:742 [4,0,2] 4 [1,0,2] 1 0'0 2017-11-08 18:04:58.697315 0'0 2017-11-08 18:04:58.697315 +function TEST_backfill_ec_prim_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$primary + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 3) + check $dir $PG $primary erasure 0 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0] -> [1,2] +# degraded 500 -> 0 +# misplaced 1000 -> 0 +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105 24'500 29:1020 [4,3,5] 4 [1,NONE,2] 1 0'0 2017-11-06 14:01:46.509963 0'0 2017-11-06 14:01:46.509963 +function TEST_backfill_ec_down_all_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + local allosds=$(get_osds $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + for o in $allosds + do + ceph osd out osd.$o + done + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + flush_pg_stats + + # Wait for recovery to finish + # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling + # to active+undersized+remapped + while(true) + do + if test "$(ceph --format json pg dump pgs | + jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0" + then + sleep 2 + continue + fi + break + done + ceph pg dump pgs + for i in $(seq 1 240) + do + if ceph pg dump pgs | grep ^$PG | grep -qv backfilling + then + break + fi + if [ $i = "240" ]; + then + echo "Timeout waiting for recovery to finish" + return 1 + fi + sleep 1 + done + + ceph pg dump pgs + + local misplaced=$(expr $objects \* 2) + check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,0,2] -> [1,3,2] +# degraded 500 -> 0 +# active+backfilling+degraded +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322 22'500 28:794 [1,3,2] 1 [1,NONE,2] 1 0'0 2017-11-06 13:54:58.033906 0'0 2017-11-06 13:54:58.033906 +function TEST_backfill_ec_down_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + check $dir $PG $primary erasure $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main osd-backfill-stats "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh" +# End: diff --git a/qa/standalone/osd/bad-inc-map.sh b/qa/standalone/osd/bad-inc-map.sh new file mode 100755 index 000000000..cc3cf27cc --- /dev/null +++ b/qa/standalone/osd/bad-inc-map.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +mon_port=$(get_unused_port) + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:$mon_port" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + set -e + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bad_inc_map() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + + ceph config set osd.2 osd_inject_bad_map_crc_probability 1 + + # osd map churn + create_pool foo 8 + ceph osd pool set foo min_size 1 + ceph osd pool set foo min_size 2 + + sleep 5 + + # make sure all the OSDs are still up + TIMEOUT=10 wait_for_osd up 0 + TIMEOUT=10 wait_for_osd up 1 + TIMEOUT=10 wait_for_osd up 2 + + # check for the signature in the log + grep "injecting map crc failure" $dir/osd.2.log || return 1 + grep "bailing because last" $dir/osd.2.log || return 1 + + echo success + + delete_pool foo + kill_daemons $dir || return 1 +} + +main bad-inc-map "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh" +# End: diff --git a/qa/standalone/osd/divergent-priors.sh b/qa/standalone/osd/divergent-priors.sh new file mode 100755 index 000000000..40d72544d --- /dev/null +++ b/qa/standalone/osd/divergent-priors.sh @@ -0,0 +1,855 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # This should multiple of 6 + export loglen=12 + export divisor=3 + export trim=$(expr $loglen / 2) + export DIVERGENT_WRITE=$(expr $trim / $divisor) + export DIVERGENT_REMOVE=$(expr $trim / $divisor) + export DIVERGENT_CREATE=$(expr $trim / $divisor) + export poolname=test + export testobjects=100 + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + CEPH_ARGS+="--osd_debug_pg_log_writeout=true " + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +# Special case divergence test +# Test handling of divergent entries with prior_version +# prior to log_tail +# based on qa/tasks/divergent_prior.py +function TEST_divergent() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + local case5=$testobjects + local case3=$(expr $testobjects - 1) + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$case5 $dummyfile & + echo 'create missing divergent object' + inject_eio rep data $poolname existing_$case3 $dir 0 || return 1 + rados -p $poolname get existing_$case3 $dir/existing & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #5 + if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_divergent_ec() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_ec_pool $poolname true k=2 m=1 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$testobjects $dummyfile2 & + sleep 1 + rados -p $poolname put existing_$testobjects $dummyfile & + rados -p $poolname mksnap snap1 + rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + sleep 5 + #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # Dump logs + for i in $non_divergent + do + kill_daemons $dir KILL osd.$i || return 1 + _objectstore_tool_nodown $dir $i --op log --pgid $pgid + activate_osd $dir $i || return 1 + done + _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #3 + # XXX: Not reproducing this case +# if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log') +# then +# echo failure +# return 1 +# fi + # Check for _merge_object_divergent_entries for case #4 + if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# Special case divergence test with ceph-objectstore-tool export/remove/import +# Test handling of divergent entries with prior_version +# prior to log_tail and a ceph-objectstore-tool export/import +# based on qa/tasks/divergent_prior2.py +function TEST_divergent_2() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# this is the same as case _2 above, except we enable pg autoscaling in order +# to reproduce https://tracker.ceph.com/issues/41816 +function TEST_divergent_3() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + # reproduce https://tracker.ceph.com/issues/41816 + ceph osd pool set $poolname pg_autoscale_mode on + + divergent=-1 + start_time=$(date +%s) + max_duration=300 + + while [ "$divergent" -le -1 ] + do + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + + current_time=$(date +%s) + elapsed_time=$(expr $current_time - $start_time) + if [ "$elapsed_time" -gt "$max_duration" ]; then + echo "timed out waiting for divergent" + return 1 + fi + done + + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main divergent-priors "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh" +# End: diff --git a/qa/standalone/osd/ec-error-rollforward.sh b/qa/standalone/osd/ec-error-rollforward.sh new file mode 100755 index 000000000..621e6b13f --- /dev/null +++ b/qa/standalone/osd/ec-error-rollforward.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_ec_error_rollforward() { + local dir=$1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd + ceph osd pool create ec 1 1 erasure ec-profile + + rados -p ec put foo /etc/passwd + + kill -STOP $(cat $dir/osd.2.pid) + + rados -p ec rm foo & + pids="$!" + sleep 1 + rados -p ec rm a & + pids+=" $!" + rados -p ec rm b & + pids+=" $!" + rados -p ec rm c & + pids+=" $!" + sleep 1 + # Use SIGKILL so stopped osd.2 will terminate + # and kill_daemons waits for daemons to die + kill_daemons $dir KILL osd + kill $pids + wait + + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + activate_osd $dir 3 || return 1 + + wait_for_clean || return 1 +} + +main ec-error-rollforward "$@" diff --git a/qa/standalone/osd/osd-bench.sh b/qa/standalone/osd/osd-bench.sh new file mode 100755 index 000000000..eb1a6a440 --- /dev/null +++ b/qa/standalone/osd/osd-bench.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7106" # git grep '\<7106\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-bluestore 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bench() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + local osd_bench_small_size_max_iops=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_small_size_max_iops) + local osd_bench_large_size_max_throughput=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_large_size_max_throughput) + local osd_bench_max_block_size=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_max_block_size) + local osd_bench_duration=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_duration) + + # + # block size too high + # + expect_failure $dir osd_bench_max_block_size \ + ceph tell osd.0 bench 1024 $((osd_bench_max_block_size + 1)) || return 1 + + # + # count too high for small (< 1MB) block sizes + # + local bsize=1024 + local max_count=$(($bsize * $osd_bench_duration * $osd_bench_small_size_max_iops)) + expect_failure $dir bench_small_size_max_iops \ + ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1 + + # + # count too high for large (>= 1MB) block sizes + # + local bsize=$((1024 * 1024 + 1)) + local max_count=$(($osd_bench_large_size_max_throughput * $osd_bench_duration)) + expect_failure $dir osd_bench_large_size_max_throughput \ + ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1 + + # + # default values should work + # + ceph tell osd.0 bench || return 1 + + # + # test object_size < block_size + ceph tell osd.0 bench 10 14456 4444 3 + # + + # + # test object_size < block_size & object_size = 0(default value) + # + ceph tell osd.0 bench 1 14456 +} + +main osd-bench "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh new file mode 100755 index 000000000..aedfbc9b5 --- /dev/null +++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -0,0 +1,497 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +[ `uname` = FreeBSD ] && exit 0 + +function run() { + local dir=$1 + shift + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bluestore() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=2147483648 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_size=536870912 " + CEPH_ARGS+="--bluestore_block_wal_create=true " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + run_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + run_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + run_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + echo "after bench" + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # expand slow devices + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + truncate $dir/0/block -s 4294967296 # 4GB + ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1 + truncate $dir/1/block -s 4311744512 # 4GB + 16MB + ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1 + truncate $dir/2/block -s 4295099392 # 4GB + 129KB + ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1 + truncate $dir/3/block -s 4293918720 # 4GB - 1MB + ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1 + + # slow, DB, WAL -> slow, DB + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes + + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.wal \ + --dev-target $dir/0/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB, WAL -> slow, WAL + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --dev-target $dir/1/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow, DB, WAL -> slow + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block.wal \ + --devs-source $dir/2/block.db \ + --dev-target $dir/2/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow, DB, WAL -> slow, WAL (negative case) + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.db \ + --dev-target $dir/3/block.wal \ + --command bluefs-bdev-migrate + + # Migration to WAL is unsupported + if [ $? -eq 0 ]; then + return 1 + fi + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + # slow, DB, WAL -> slow, DB (WAL to slow then slow to DB) + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.wal \ + --dev-target $dir/3/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block \ + --dev-target $dir/3/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + wait_for_clean || return 1 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # slow, DB -> slow, DB, WAL + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + dd if=/dev/zero of=$dir/0/wal count=512 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --dev-target $dir/0/wal \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, WAL -> slow, DB, WAL + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + dd if=/dev/zero of=$dir/1/db count=1024 bs=1M + ceph-bluestore-tool --path $dir/1 \ + --dev-target $dir/1/db \ + --command bluefs-bdev-new-db || return 1 + + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block \ + --dev-target $dir/1/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow -> slow, DB, WAL + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --command bluefs-bdev-new-db || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --dev-target $dir/2/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow, DB -> slow, WAL + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.db \ + --dev-target $dir/3/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # slow, DB1, WAL -> slow, DB2, WAL + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.db \ + --dev-target $dir/0/db2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB, WAL1 -> slow, DB, WAL2 + + dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.wal \ + --dev-target $dir/0/wal2 \ + --command bluefs-bdev-migrate || return 1 + rm -rf $dir/0/wal + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB + WAL -> slow, DB2 -> slow + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --devs-source $dir/1/block.wal \ + --dev-target $dir/1/db2 \ + --command bluefs-bdev-migrate || return 1 + + rm -rf $dir/1/db + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --dev-target $dir/1/block \ + --command bluefs-bdev-migrate || return 1 + + rm -rf $dir/1/db2 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow -> slow, DB (negative case) + ceph-objectstore-tool --type bluestore --data-path $dir/2 \ + --op fsck --no-mon-config || return 1 + + dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --dev-target $dir/2/db2 \ + --command bluefs-bdev-migrate + + # Migration from slow-only to new device is unsupported + if [ $? -eq 0 ]; then + return 1 + fi + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow + DB + WAL -> slow, DB2 + dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M + + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --devs-source $dir/2/block.db \ + --devs-source $dir/2/block.wal \ + --dev-target $dir/2/db2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow + WAL -> slow2, WAL2 + dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block \ + --devs-source $dir/3/block.wal \ + --dev-target $dir/3/wal2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + wait_for_clean || return 1 +} + +function TEST_bluestore2() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_create=false " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + CEPH_ARGS+="--osd_pool_default_size=1 " + CEPH_ARGS+="--osd_pool_default_min_size=1 " + CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + sleep 5 + create_pool foo 16 + + retry = 0 + while [[ $retry -le 5 ]]; do + # write some objects + timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1 + + #give RocksDB some time to cooldown and put files to slow level(s) + sleep 10 + + db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" ) + spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" ) + ((retry+=1)) + test $spilled_over -eq 0 || break + done + test $spilled_over -gt 0 || return 1 + + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.db \ + --dev-target $dir/0/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 \ + --command bluefs-bdev-sizes || return 1 + + ceph-bluestore-tool --path $dir/0 \ + --command fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + wait_for_clean || return 1 +} + +function TEST_bluestore_expand() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_create=false " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + CEPH_ARGS+="--osd_pool_default_size=1 " + CEPH_ARGS+="--osd_pool_default_min_size=1 " + CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + sleep 5 + create_pool foo 16 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + sleep 5 + + total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" ) + free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2` + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + # destage allocation to file before expand (in case fast-shutdown skipped that step) + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1 + + # expand slow devices + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1 + + requested_space=4294967296 # 4GB + truncate $dir/0/block -s $requested_space + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1 + + # slow, DB, WAL -> slow, DB + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1 + + # compare allocation-file with RocksDB state + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1 + + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + wait_for_clean || return 1 + + total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" ) + free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2` + + if [$total_space_after != $requested_space]; then + echo "total_space_after = $total_space_after" + echo "requested_space = $requested_space" + return 1; + fi + + total_space_added=$((total_space_after - total_space_before)) + free_space_added=$((free_space_after - free_space_before)) + + let new_used_space=($total_space_added - $free_space_added) + echo $new_used_space + # allow upto 128KB to be consumed + if [ $new_used_space -gt 131072 ]; then + echo "total_space_added = $total_space_added" + echo "free_space_added = $free_space_added" + return 1; + fi + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1 +} + +main osd-bluefs-volume-ops "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh" +# End: diff --git a/qa/standalone/osd/osd-config.sh b/qa/standalone/osd/osd-config.sh new file mode 100755 index 000000000..126c2f7de --- /dev/null +++ b/qa/standalone/osd/osd-config.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_config_init() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local stale=1000 + local cache=500 + run_osd $dir 0 \ + --osd-map-cache-size=$cache \ + --osd-pg-epoch-persisted-max-stale=$stale \ + || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 +} + +function TEST_config_track() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + local osd_map_cache_size=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_map_cache_size) + local osd_pg_epoch_persisted_max_stale=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_pg_epoch_persisted_max_stale) + + # + # increase the osd_pg_epoch_persisted_max_stale above the default cache_size + # + ! grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 + local stale=$(($osd_map_cache_size * 2)) + ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 + rm $dir/osd.0.log + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log reopen || return 1 +} + +function TEST_default_adjustment() { + a=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin) + b=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin default) + c=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin arg) + [ "$a" != "default" ] || return 1 + [ "$b" = "default" ] || return 1 + [ "$c" = "arg" ] || return 1 + + a=$(ceph-osd --no-mon-config --show-config-value log_to_file) + b=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false) + c=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false --log-to-file) + [ "$a" = "true" ] || return 1 + [ "$b" = "false" ] || return 1 + [ "$c" = "true" ] || return 1 +} + +main osd-config "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-config.sh" +# End: diff --git a/qa/standalone/osd/osd-copy-from.sh b/qa/standalone/osd/osd-copy-from.sh new file mode 100755 index 000000000..8ac0ab541 --- /dev/null +++ b/qa/standalone/osd/osd-copy-from.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Sage Weil <sage@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7111" # git grep '\<7111\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_copy_from() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + + # success + rados -p rbd put foo $(which rados) + rados -p rbd cp foo foo2 + rados -p rbd stat foo2 + + # failure + ceph tell osd.\* injectargs -- --osd-debug-inject-copyfrom-error + ! rados -p rbd cp foo foo3 + ! rados -p rbd stat foo3 + + # success again + ceph tell osd.\* injectargs -- --no-osd-debug-inject-copyfrom-error + ! rados -p rbd cp foo foo3 + rados -p rbd stat foo3 +} + +main osd-copy-from "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/osd/osd-dup.sh b/qa/standalone/osd/osd-dup.sh new file mode 100755 index 000000000..ab442c538 --- /dev/null +++ b/qa/standalone/osd/osd-dup.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +[ `uname` = FreeBSD ] && exit 0 + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # avoid running out of fds in rados bench + CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 " + CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +main osd-dup "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh" +# End: diff --git a/qa/standalone/osd/osd-fast-mark-down.sh b/qa/standalone/osd/osd-fast-mark-down.sh new file mode 100755 index 000000000..0ef9d8ce4 --- /dev/null +++ b/qa/standalone/osd/osd-fast-mark-down.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Piotr Dałek <git@predictor.org.pl> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +MAX_PROPAGATION_TIME=30 + +function run() { + local dir=$1 + shift + rm -f $dir/*.pid + export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + OLD_ARGS=$CEPH_ARGS + CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false " + echo "Ensuring old behavior is there..." + test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1) + + CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true " + OLD_ARGS=$CEPH_ARGS + + CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON" + echo "Testing async msgr..." + test_fast_kill $dir || return 1 + + return 0 + +} + +function test_fast_kill() { + # create cluster with 3 osds + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for oi in {0..2}; do + run_osd $dir $oi || return 1 + pids[$oi]=$(cat $dir/osd.$oi.pid) + done + + create_rbd_pool || return 1 + + # make some objects so osds to ensure connectivity between osds + timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1 + sleep 1 + + killid=0 + previd=0 + + # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased. + for i in {1..2}; do + while [ $killid -eq $previd ]; do + killid=${pids[$RANDOM%${#pids[@]}]} + done + previd=$killid + + kill -9 $killid + time_left=$MAX_PROPAGATION_TIME + down_osds=0 + + while [ $time_left -gt 0 ]; do + sleep 1 + time_left=$[$time_left - 1]; + + grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null + if [ $? -ne 0 ]; then + continue + fi + + down_osds=$(ceph osd tree | grep -c down) + if [ $down_osds -lt $i ]; then + # osds not marked down yet, try again in a second + continue + elif [ $down_osds -gt $i ]; then + echo Too many \($down_osds\) osds died! + return 1 + else + break + fi + done + + if [ $down_osds -lt $i ]; then + echo Killed the OSD, yet it is not marked down + ceph osd tree + return 1 + fi + done + pkill -SIGTERM rados + teardown $dir || return 1 +} + +main osd-fast-mark-down "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh" +# End: diff --git a/qa/standalone/osd/osd-force-create-pg.sh b/qa/standalone/osd/osd-force-create-pg.sh new file mode 100755 index 000000000..ca4b0239e --- /dev/null +++ b/qa/standalone/osd/osd-force-create-pg.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7145" # git grep '\<7145\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reuse_id() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 50 || return 1 + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.0 + kill_daemons $dir TERM osd.1 + kill_daemons $dir TERM osd.2 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force + ceph-objectstore-tool --data-path $dir/1 --op remove --pgid 1.0 --force + ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.0 --force + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + sleep 10 + ceph pg ls | grep 1.0 | grep stale || return 1 + + ceph osd force-create-pg 1.0 --yes-i-really-mean-it || return 1 + wait_for_clean || return 1 +} + +main osd-force-create-pg "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-force-create-pg.sh" +# End: diff --git a/qa/standalone/osd/osd-markdown.sh b/qa/standalone/osd/osd-markdown.sh new file mode 100755 index 000000000..5c4a78440 --- /dev/null +++ b/qa/standalone/osd/osd-markdown.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Intel <contact@intel.com.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Xiaoxi Chen <xiaoxi.chen@intel.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7108" # git grep '\<7108\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function markdown_N_impl() { + markdown_times=$1 + total_time=$2 + sleeptime=$3 + for i in `seq 1 $markdown_times` + do + # check the OSD is UP + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree + ceph osd tree | grep osd.0 |grep up || return 1 + # mark the OSD down. + # override any dup setting in the environment to ensure we do this + # exactly once (modulo messenger failures, at least; we can't *actually* + # provide exactly-once semantics for mon commands). + ( unset CEPH_CLI_TEST_DUP_COMMAND ; ceph osd down 0 ) + sleep $sleeptime + done +} + + +function TEST_markdown_exceed_maxdown_count() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3+1 times within 300s, osd should stay dead on the 4th time + local count=3 + local sleeptime=10 + local period=300 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $(($count+1)) $period $sleeptime + # down N+1 times ,the osd.0 should die + ceph osd tree | grep down | grep osd.0 || return 1 +} + +function TEST_markdown_boot() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3 times within 120s, should stay up + local count=3 + local sleeptime=10 + local period=120 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $count $period $sleeptime + #down N times, osd.0 should be up + sleep 15 # give osd plenty of time to notice and come back up + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree | grep up | grep osd.0 || return 1 +} + +function TEST_markdown_boot_exceed_time() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3+1 times, but over 40s, > 20s, so should stay up + local count=3 + local period=20 + local sleeptime=10 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $(($count+1)) $period $sleeptime + sleep 15 # give osd plenty of time to notice and come back up + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree | grep up | grep osd.0 || return 1 +} + +function TEST_osd_stop() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + osd_0_pid=$(cat $dir/osd.0.pid) + ps -p $osd_0_pid || return 1 + + ceph osd tree | grep osd.0 | grep up || return 1 + ceph osd stop osd.0 + sleep 15 # give osd plenty of time to notice and exit + ceph osd tree | grep down | grep osd.0 || return 1 + ! ps -p $osd_0_pid || return 1 +} + +main osd-markdown "$@" diff --git a/qa/standalone/osd/osd-reactivate.sh b/qa/standalone/osd/osd-reactivate.sh new file mode 100755 index 000000000..6d6438629 --- /dev/null +++ b/qa/standalone/osd/osd-reactivate.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# +# Author: Vicente Cheng <freeze.bilsted@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reactivate() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + kill_daemons $dir TERM osd || return 1 + + ready_path=$dir"/0/ready" + activate_path=$dir"/0/active" + # trigger mkfs again + rm -rf $ready_path $activate_path + activate_osd $dir 0 || return 1 + +} + +main osd-reactivate "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reactivate.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-prio.sh b/qa/standalone/osd/osd-recovery-prio.sh new file mode 100755 index 000000000..02b65f67a --- /dev/null +++ b/qa/standalone/osd/osd-recovery-prio.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + # Set osd op queue = wpq for the tests. Recovery priority is not + # considered by mclock_scheduler leading to unexpected results. + CEPH_ARGS+="--osd-op-queue=wpq " + export objects=200 + export poolprefix=test + export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED + export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_recovery_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + local max_tries=10 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool3 size 1 --yes-i-really-mean-it + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set norecover + ceph osd set noout + + # Get a pg to want to recover and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-recovery" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The first force-recovery PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-recovery" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + #ceph osd unset norecover + ceph pg cancel-force-recovery $PG3 || return 1 + sleep 2 + #ceph osd set norecover + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-recovery $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-recovery $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-recovery PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset norecover + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to recovery priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start recovery by changing the pool sizes from 1 to 2 +# Use dump_recovery_reservations to verify priorities +function TEST_recovery_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio) + pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + + # Wait for both PGs to be in recovering state + ceph pg dump pgs + + # Wait for recovery to start + set -o pipefail + count=0 + while(true) + do + if test $(ceph --format json pg dump pgs | + jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2" + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Recovery never started on both PGs" + return 1 + fi + count=$(expr $count + 1) + done + set +o pipefail + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG for $pool2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-recovery-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-space.sh b/qa/standalone/osd/osd-recovery-space.sh new file mode 100755 index 000000000..3bafc5138 --- /dev/null +++ b/qa/standalone/osd/osd-recovery-space.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_backfills=10 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_state() { + local state=$1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state -gt 0 && break + if (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_recovery_toofull() { + local timeout=$1 + wait_for_state recovery_toofull $timeout +} + + +# Create 1 pools with size 1 +# set ful-ratio to 50% +# Write data 600 5K (3000K) +# Inject fake_statfs_for_testing to 3600K (83% full) +# Incresase the pool size to 2 +# The pool shouldn't have room to recovery +function TEST_recovery_test_simple() { + local dir=$1 + local pools=1 + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-nearfull-ratio .40 + ceph osd set-backfillfull-ratio .45 + ceph osd set-full-ratio .50 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=5 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + + for o in $(seq 0 $(expr $OSDS - 1)) + do + ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1 + done + sleep 5 + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + + # If this times out, we'll detected errors below + wait_for_recovery_toofull 30 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in recovery_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_ERR" ]; then + echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then + echo "PG_RECOVERY_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + + +main osd-recovery-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-stats.sh b/qa/standalone/osd/osd-recovery-stats.sh new file mode 100755 index 000000000..ad6f810d7 --- /dev/null +++ b/qa/standalone/osd/osd-recovery-stats.sh @@ -0,0 +1,512 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function below_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 )) +} + +function above_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 )) +} + +FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"' +FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"' +FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"' + +function check() { + local dir=$1 + local PG=$2 + local primary=$3 + local type=$4 + local degraded_start=$5 + local degraded_end=$6 + local misplaced_start=$7 + local misplaced_end=$8 + local primary_start=${9:-} + local primary_end=${10:-} + + local log=$dir/osd.${primary}.log + + local addp=" " + if [ "$type" = "erasure" ]; + then + addp="p" + fi + + UPACT=$(eval $FIND_UPACT) + + # Check 3rd line at start because of false recovery starts + local which="degraded" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $degraded_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $degraded_end || return 1 + + # Check 3rd line at start because of false recovery starts + which="misplaced" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $misplaced_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $misplaced_end || return 1 + + # This is the value of set into MISSING_ON_PRIMARY + if [ -n "$primary_start" ]; + then + which="shard $primary" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $primary_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $primary_end || return 1 + fi +} + +# [1,0,?] -> [1,2,4] +# degraded 500 -> 0 +# active+recovering+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467 +function do_recovery_out1() { + local dir=$1 + shift + local type=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + if [ $type = "erasure" ]; + then + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 $type myprofile + else + create_pool $poolname 1 1 $type + fi + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + check $dir $PG $primary $type $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_recovery_replicated_out1() { + local dir=$1 + + do_recovery_out1 $dir replicated || return 1 +} + +function TEST_recovery_erasure_out1() { + local dir=$1 + + do_recovery_out1 $dir erasure || return 1 +} + +# [0, 1] -> [2,3,4,5] +# degraded 1000 -> 0 +# misplaced 1000 -> 0 +# missing on primary 500 -> 0 + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748 +function TEST_recovery_sizeup() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + ceph osd out osd.$primary osd.$otherosd + ceph osd pool set test size 4 + ceph osd unset norecover + # Get new primary + primary=$(get_primary $poolname obj1) + + ceph tell osd.${primary} debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local degraded=$(expr $objects \* 2) + local misplaced=$(expr $objects \* 2) + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [0, 1, 2, 4] -> [3, 5] +# misplaced 1000 -> 0 +# missing on primary 500 -> 0 +# active+recovering+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248 +function TEST_recovery_sizedown() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 4 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local allosds=$(get_osds $poolname obj1) + + ceph osd set norecover + for osd in $allosds + do + ceph osd out osd.$osd + done + + ceph osd pool set test size 2 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + # Get new primary + primary=$(get_primary $poolname obj1) + + local misplaced=$(expr $objects \* 2) + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1 + + UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/") + + # This is the value of set into MISSING_ON_PRIMARY + FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/") + below_margin $FIRST $objects || return 1 + LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/") + above_margin $LAST 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1] -> [1,2] +# degraded 300 -> 200 +# active+recovering+undersized+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563 +function TEST_recovery_undersized() { + local dir=$1 + + local osds=3 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $osds - 1)) + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + + ceph osd set norecover + # Mark any osd not the primary (only 1 replica so also has no replica) + for i in $(seq 0 $(expr $osds - 1)) + do + if [ $i = $primary ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd pool set test size 4 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean() + sleep 10 + flush_pg_stats || return 1 + + # Wait for recovery to finish + # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded + # to active+undersized+degraded + for i in $(seq 1 300) + do + if ceph pg dump pgs | grep ^$PG | grep -qv recovering + then + break + fi + if [ $i = "300" ]; + then + echo "Timeout waiting for recovery to finish" + return 1 + fi + sleep 1 + done + + # Get new primary + primary=$(get_primary $poolname obj1) + local log=$dir/osd.${primary}.log + + local first_degraded=$(expr $objects \* 3) + local last_degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0,2] -> [1,3,NONE]/[1,3,2] +# degraded 100 -> 0 +# misplaced 100 -> 100 +# active+recovering+degraded+remapped + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242 +function TEST_recovery_erasure_remapped() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + + # Mark osd not the primary and not down/out osd as just out + for i in 0 1 2 3 + do + if [ $i = $primary ]; + then + continue + fi + if [ $i = $otherosd ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local log=$dir/osd.${primary}.log + check $dir $PG $primary erasure $objects 0 $objects $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_recovery_multi() { + local dir=$1 + + local osds=6 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $osds - 1)) + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 1 + + wait_for_clean || return 1 + + rados -p $poolname put obj1 /dev/null + + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + + local half=$(expr $objects / 2) + for i in $(seq 2 $half) + do + rados -p $poolname put obj$i /dev/null + done + + kill $(cat $dir/osd.${primary}.pid) + ceph osd down osd.${primary} + activate_osd $dir ${otherosd} + sleep 3 + + for i in $(seq $(expr $half + 1) $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + local otherosd=$(get_not_primary $poolname obj$objects) + + ceph osd unset noout + ceph osd out osd.$primary osd.$otherosd + activate_osd $dir ${primary} + sleep 3 + + ceph osd pool set test size 4 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + # Get new primary + primary=$(get_primary $poolname obj1) + + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main osd-recovery-stats "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh" +# End: diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh new file mode 100755 index 000000000..6fea441b3 --- /dev/null +++ b/qa/standalone/osd/osd-rep-recov-eio.sh @@ -0,0 +1,422 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# +# Author: Kefu Chai <kchai@redhat.com> +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +warnings=10 + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + # set warning amount in case default changes + run_mon $dir a --mon_osd_warn_num_repaired=$warnings || return 1 + run_mgr $dir x || return 1 + ceph osd pool create foo 8 || return 1 + + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function setup_osds() { + local count=$1 + shift + local type=$1 + + for id in $(seq 0 $(expr $count - 1)) ; do + run_osd${type} $dir $id || return 1 + done + wait_for_clean || return 1 +} + +function get_state() { + local pgid=$1 + local sname=state + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function rados_put() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 +} + +function rados_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + local expect=${4:-ok} + + # + # Expect a failure to get object + # + if [ $expect = "fail" ]; + then + ! rados --pool $poolname get $objname $dir/COPY + return + fi + # + # Expect hang trying to get object + # + if [ $expect = "hang" ]; + then + timeout 5 rados --pool $poolname get $objname $dir/COPY + test "$?" = "124" + return + fi + # + # get an object, compare with $dir/ORIGINAL + # + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY +} + +function rados_get_data() { + local inject=$1 + shift + local dir=$1 + + local poolname=pool-rep + local objname=obj-$inject-$$ + local pgid=$(get_pg $poolname $objname) + + rados_put $dir $poolname $objname || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 + rados_get $dir $poolname $objname || return 1 + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "1" || return 1 + + local object_osds=($(get_osds $poolname $objname)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname $objname || return 1 + + # Wait until automatic repair of bad peer is done + wait_for_clean || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 + rados_get $dir $poolname $objname || return 1 + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "3" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "4" || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 1 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 + rados_get $dir $poolname $objname hang || return 1 + + wait_for_clean + # After hang another repair couldn't happen, so count stays the same + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "3" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "4" || return 1 +} + +function TEST_rados_get_with_eio() { + local dir=$1 + + setup_osds 4 || return 1 + + local poolname=pool-rep + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + rados_get_data eio $dir || return 1 + + delete_pool $poolname +} + +function TEST_rados_repair_warning() { + local dir=$1 + local OBJS=$(expr $warnings + 1) + + setup_osds 4 || return 1 + + local poolname=pool-rep + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + local poolname=pool-rep + local objbase=obj-warn + local inject=eio + + for i in $(seq 1 $OBJS) + do + rados_put $dir $poolname ${objbase}-$i || return 1 + inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1 + rados_get $dir $poolname ${objbase}-$i || return 1 + done + local pgid=$(get_pg $poolname ${objbase}-1) + + local object_osds=($(get_osds $poolname ${objbase}-1)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$OBJS" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$OBJS" || return 1 + + ceph health | grep -q "Too many repaired reads on 1 OSDs" || return 1 + ceph health detail | grep -q "osd.$primary had $OBJS reads repaired" || return 1 + + ceph health mute OSD_TOO_MANY_REPAIRS + set -o pipefail + # Should mute this + ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1 + set +o pipefail + + for i in $(seq 1 $OBJS) + do + inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1 + inject_$inject rep data $poolname ${objbase}-$i $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname ${objbase}-$i || return 1 + done + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$(expr $OBJS \* 2)" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$(expr $OBJS \* 3)" || return 1 + + # Give mon a chance to notice additional OSD and unmute + # The default tick time is 5 seconds + CHECKTIME=10 + LOOPS=0 + while(true) + do + sleep 1 + if ceph health | grep -q "Too many repaired reads on 2 OSDs" + then + break + fi + LOOPS=$(expr $LOOPS + 1) + if test "$LOOPS" = "$CHECKTIME" + then + echo "Too many repaired reads not seen after $CHECKTIME seconds" + return 1 + fi + done + ceph health detail | grep -q "osd.$primary had $(expr $OBJS \* 2) reads repaired" || return 1 + ceph health detail | grep -q "osd.$bad_peer had $OBJS reads repaired" || return 1 + + delete_pool $poolname +} + +# Test backfill with unfound object +function TEST_rep_backfill_unfound() { + local dir=$1 + local objname=myobject + local lastobj=300 + # Must be between 1 and $lastobj + local testobj=obj250 + + export CEPH_ARGS + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 3 || return 1 + + local poolname=test-pool + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio rep data $poolname $testobj $dir 0 || return 1 + inject_eio rep data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 360); do + state=$(get_state 2.0) + echo $state | grep backfill_unfound + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +# Test recovery with unfound object +function TEST_rep_recovery_unfound() { + local dir=$1 + local objname=myobject + local lastobj=100 + # Must be between 1 and $lastobj + local testobj=obj75 + + setup_osds 3 || return 1 + + local poolname=test-pool + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio rep data $poolname $testobj $dir 0 || return 1 + inject_eio rep data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 100); do + state=$(get_state 2.0) + echo $state | grep -v recovering + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +main osd-rep-recov-eio.sh "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh" +# End: diff --git a/qa/standalone/osd/osd-reuse-id.sh b/qa/standalone/osd/osd-reuse-id.sh new file mode 100755 index 000000000..b24b6f2eb --- /dev/null +++ b/qa/standalone/osd/osd-reuse-id.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7123" # git grep '\<7123\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reuse_id() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + destroy_osd $dir 1 || return 1 + run_osd $dir 1 || return 1 +} + +main osd-reuse-id "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reuse-id.sh" +# End: diff --git a/qa/standalone/osd/pg-split-merge.sh b/qa/standalone/osd/pg-split-merge.sh new file mode 100755 index 000000000..7f2899b60 --- /dev/null +++ b/qa/standalone/osd/pg-split-merge.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_a_merge_empty() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 2 || return 1 + ceph osd pool set foo pgp_num 1 || return 1 + + wait_for_clean || return 1 + + # note: we need 1.0 to have the same or more objects than 1.1 + # 1.1 + rados -p foo put foo1 /etc/passwd + rados -p foo put foo2 /etc/passwd + rados -p foo put foo3 /etc/passwd + rados -p foo put foo4 /etc/passwd + # 1.0 + rados -p foo put foo5 /etc/passwd + rados -p foo put foo6 /etc/passwd + rados -p foo put foo8 /etc/passwd + rados -p foo put foo10 /etc/passwd + rados -p foo put foo11 /etc/passwd + rados -p foo put foo12 /etc/passwd + rados -p foo put foo16 /etc/passwd + + wait_for_clean || return 1 + + ceph tell osd.1 config set osd_debug_no_purge_strays true + ceph osd pool set foo size 2 || return 1 + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.2 || return 1 + ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1 + activate_osd $dir 2 || return 1 + + wait_for_clean || return 1 + + # osd.2: now 1.0 is there but 1.1 is not + + # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is + # the problematic state... then let it merge with 1.0 + ceph tell osd.2 config set osd_debug_no_acting_change true + ceph osd out 0 1 + ceph osd pool set foo pg_num 1 + sleep 5 + ceph tell osd.2 config set osd_debug_no_acting_change false + + # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get + # removed + ceph osd in 0 1 + ceph osd pool set foo size 3 + + wait_for_clean || return 1 + + # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing + # half of its objects). + ceph pg scrub 1.0 + sleep 10 + ceph log last debug + ceph pg ls + ceph pg ls | grep ' active.clean ' || return 1 +} + +function TEST_import_after_merge_and_gap() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd pool create foo 2 || return 1 + wait_for_clean || return 1 + rados -p foo bench 3 write -b 1024 --no-cleanup || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1 + activate_osd $dir 0 || return 1 + + ceph osd pool set foo pg_num 1 + sleep 5 + while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done + wait_for_clean || return 1 + + # + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + # this will import both halves the original pg + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 + + # make a map gap + for f in `seq 1 50` ; do + ceph osd set nodown + ceph osd unset nodown + done + + # poke and prod to ensure last_epech_clean is big, reported to mon, and + # the osd is able to trim old maps + rados -p foo bench 1 write -b 1024 --no-cleanup || return 1 + wait_for_clean || return 1 + ceph tell osd.0 send_beacon + sleep 5 + ceph osd set nodown + ceph osd unset nodown + sleep 5 + + kill_daemons $dir TERM osd.0 || return 1 + + # this should fail.. 1.1 still doesn't exist + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1 + + # this should fail too because of the gap + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + # we can force it... + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1 + + # ...but the osd won't start, so remove it again. + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1 + + + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 +} + +function TEST_import_after_split() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd pool create foo 1 || return 1 + wait_for_clean || return 1 + rados -p foo bench 3 write -b 1024 --no-cleanup || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1 + activate_osd $dir 0 || return 1 + + ceph osd pool set foo pg_num 2 + sleep 5 + while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + + # this should fail because 1.1 (split child) is there + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1 + # now it will work (1.1. is gone) + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 +} + + +main pg-split-merge "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh" +# End: diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh new file mode 100755 index 000000000..af406ef92 --- /dev/null +++ b/qa/standalone/osd/repeer-on-acting-back.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn> +# +# Author: xie xingguo <xie.xingguo@zte.com.cn> +# Author: Yan Jun <yan.jun8@zte.com.cn> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export poolname=test + export testobjects=100 + export loglen=12 + export trim=$(expr $loglen / 2) + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + # use small pg_log settings, so we always do backfill instead of recovery + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_repeer_on_down_acting_member_coming_back() { + local dir=$1 + local dummyfile='/etc/fstab' + + local num_osds=6 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid') + local pgid=$poolid.0 + + # enable required feature-bits for upmap + ceph osd set-require-min-compat-client luminous + # reset up to [1,2,3] + ceph osd pg-upmap $pgid 1 2 3 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # reset up to [1,4,5] + ceph osd pg-upmap $pgid 1 4 5 || return 1 + + # wait for peering to complete + sleep 2 + + # make sure osd.2 belongs to current acting set + ceph pg $pgid query | jq '.acting' | grep 2 || return 1 + + # kill osd.2 + kill_daemons $dir KILL osd.2 || return 1 + ceph osd down osd.2 + + # again, wait for peering to complete + sleep 2 + + # osd.2 should have been moved out from acting set + ceph pg $pgid query | jq '.acting' | grep 2 && return 1 + + # bring up osd.2 + activate_osd $dir 2 || return 1 + wait_for_osd up 2 + + # again, wait for peering to complete + sleep 2 + + # primary should be able to re-add osd.2 into acting + ceph pg $pgid query | jq '.acting' | grep 2 || return 1 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main repeer-on-acting-back "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh" +# End: diff --git a/qa/standalone/osd/repro_long_log.sh b/qa/standalone/osd/repro_long_log.sh new file mode 100755 index 000000000..fa27d7017 --- /dev/null +++ b/qa/standalone/osd/repro_long_log.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: Josh Durgin <jdurgin@redhat.com> +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +PGID= + +function test_log_size() +{ + local PGID=$1 + local EXPECTED=$2 + local DUPS_EXPECTED=${3:-0} + ceph tell osd.\* flush_pg_stats + sleep 3 + ceph pg $PGID query | jq .info.stats.log_size + ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}" + ceph pg $PGID query | jq .info.stats.log_dups_size + ceph pg $PGID query | jq .info.stats.log_dups_size | grep "${DUPS_EXPECTED}" +} + +function setup_log_test() { + local dir=$1 + local which=$2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create test 1 1 || true + POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool') + PGID="${POOL_ID}.0" + + # With 1 PG setting entries per osd 20 results in a target log of 20 + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 20 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1 + ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 + + touch $dir/foo + for i in $(seq 1 20) + do + rados -p test put foo $dir/foo || return 1 + done + + test_log_size $PGID 20 || return 1 + + rados -p test rm foo || return 1 + + # generate error entries + for i in $(seq 1 20) + do + rados -p test rm foo + done + + # log should have been trimmed down to min_entries with one extra + test_log_size $PGID 21 || return 1 +} + +function TEST_repro_long_log1() +{ + local dir=$1 + + setup_log_test $dir || return 1 + # regular write should trim the log + rados -p test put foo $dir/foo || return 1 + test_log_size $PGID 22 || return 1 +} + +function TEST_repro_long_log2() +{ + local dir=$1 + + setup_log_test $dir || return 1 + local PRIMARY=$(ceph pg $PGID query | jq '.info.stats.up_primary') + kill_daemons $dir TERM osd.$PRIMARY || return 1 + CEPH_ARGS="--osd-max-pg-log-entries=2 --osd-pg-log-dups-tracked=3 --no-mon-config" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1 + activate_osd $dir $PRIMARY || return 1 + wait_for_clean || return 1 + test_log_size $PGID 21 18 || return 1 +} + +function TEST_trim_max_entries() +{ + local dir=$1 + + setup_log_test $dir || return 1 + + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 0 + + # adding log entries, should only trim 4 and add one each time + rados -p test rm foo + test_log_size $PGID 18 || return 1 + rados -p test rm foo + test_log_size $PGID 15 || return 1 + rados -p test rm foo + test_log_size $PGID 12 || return 1 + rados -p test rm foo + test_log_size $PGID 9 || return 1 + rados -p test rm foo + test_log_size $PGID 6 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 + + # below trim_min + rados -p test rm foo + test_log_size $PGID 4 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 + rados -p test rm foo + test_log_size $PGID 4 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 +} + +function TEST_trim_max_entries_with_dups() +{ + local dir=$1 + + setup_log_test $dir || return 1 + + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 + + # adding log entries, should only trim 4 and add one each time + # dups should be trimmed to 1 + rados -p test rm foo + test_log_size $PGID 18 2 || return 1 + rados -p test rm foo + test_log_size $PGID 15 6 || return 1 + rados -p test rm foo + test_log_size $PGID 12 10 || return 1 + rados -p test rm foo + test_log_size $PGID 9 14 || return 1 + rados -p test rm foo + test_log_size $PGID 6 18 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 + + # below trim_min + rados -p test rm foo + test_log_size $PGID 4 20 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 + rados -p test rm foo + test_log_size $PGID 4 20 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 +} + +main repro-long-log "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh" +# End: diff --git a/qa/standalone/scrub/osd-mapper.sh b/qa/standalone/scrub/osd-mapper.sh new file mode 100755 index 000000000..ed18f94f1 --- /dev/null +++ b/qa/standalone/scrub/osd-mapper.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# -*- mode:text; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +# vim: ts=8 sw=2 smarttab +# +# test the handling of a corrupted SnapMapper DB by Scrub + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +# one clone & multiple snaps (according to the number of parameters) +function make_a_clone() +{ + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + local pool=$1 + local obj=$2 + echo $RANDOM | rados -p $pool put $obj - || return 1 + shift 2 + for snap in $@ ; do + rados -p $pool mksnap $snap || return 1 + done + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + +function TEST_truncated_sna_record() { + local dir=$1 + local -A cluster_conf=( + ['osds_num']="3" + ['pgs_in_pool']="4" + ['pool_name']="test" + ) + + local extr_dbg=3 + (( extr_dbg > 1 )) && echo "Dir: $dir" + standard_scrub_cluster $dir cluster_conf + ceph tell osd.* config set osd_stats_update_period_not_scrubbing "1" + ceph tell osd.* config set osd_stats_update_period_scrubbing "1" + + local osdn=${cluster_conf['osds_num']} + local poolid=${cluster_conf['pool_id']} + local poolname=${cluster_conf['pool_name']} + local objname="objxxx" + + # create an object and clone it + make_a_clone $poolname $objname snap01 snap02 || return 1 + make_a_clone $poolname $objname snap13 || return 1 + make_a_clone $poolname $objname snap24 snap25 || return 1 + echo $RANDOM | rados -p $poolname put $objname - || return 1 + + #identify the PG and the primary OSD + local pgid=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.pgid'` + local osd=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'` + echo "pgid is $pgid (primary: osd.$osd)" + # turn on the publishing of test data in the 'scrubber' section of 'pg query' output + set_query_debug $pgid + + # verify the existence of these clones + (( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname + + # scrub the PG + ceph pg $pgid deep_scrub || return 1 + + # we aren't just waiting for the scrub to terminate, but also for the + # logs to be published + sleep 3 + ceph pg dump pgs + until grep -a -q -- "event: --^^^^---- ScrubFinished" $dir/osd.$osd.log ; do + sleep 0.2 + done + + ceph pg dump pgs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + sleep 5 + grep -a -q -v "ERR" $dir/osd.$osd.log || return 1 + + # kill the OSDs + kill_daemons $dir TERM osd || return 1 + + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/0 dump "p" + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump "p" | grep -a SNA_ + (( extr_dbg >= 2 )) && grep -a SNA_ /tmp/oo2.dump + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump p 2> /dev/null + local num_sna_b4=`ceph-kvstore-tool bluestore-kv $dir/$osd dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \ + | awk -e '{print $2;}' | wc -l` + + for sdn in $(seq 0 $(expr $osdn - 1)) + do + kvdir=$dir/$sdn + echo "corrupting the SnapMapper DB of osd.$sdn (db: $kvdir)" + (( extr_dbg >= 3 )) && ceph-kvstore-tool bluestore-kv $kvdir dump "p" + + # truncate the 'mapping' (SNA_) entry corresponding to the snap13 clone + KY=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_0000000000000003_000000000000000' \ + | awk -e '{print $2;}'` + (( extr_dbg >= 1 )) && echo "SNA key: $KY" | cat -v + + tmp_fn1=`mktemp -p /tmp --suffix="_the_val"` + (( extr_dbg >= 1 )) && echo "Value dumped in: $tmp_fn1" + ceph-kvstore-tool bluestore-kv $kvdir get p "$KY" out $tmp_fn1 2> /dev/null + (( extr_dbg >= 2 )) && od -xc $tmp_fn1 + + NKY=${KY:0:-30} + ceph-kvstore-tool bluestore-kv $kvdir rm "p" "$KY" 2> /dev/null + ceph-kvstore-tool bluestore-kv $kvdir set "p" "$NKY" in $tmp_fn1 2> /dev/null + + (( extr_dbg >= 1 )) || rm $tmp_fn1 + done + + orig_osd_args=" ${cluster_conf['osd_args']}" + orig_osd_args=" $(echo $orig_osd_args)" + (( extr_dbg >= 2 )) && echo "Copied OSD args: /$orig_osd_args/ /${orig_osd_args:1}/" + for sdn in $(seq 0 $(expr $osdn - 1)) + do + CEPH_ARGS="$CEPH_ARGS $orig_osd_args" activate_osd $dir $sdn + done + sleep 1 + + for sdn in $(seq 0 $(expr $osdn - 1)) + do + timeout 60 ceph tell osd.$sdn version + done + rados --format json-pretty -p $poolname listsnaps $objname + + # when scrubbing now - we expect the scrub to emit a cluster log ERR message regarding SnapMapper internal inconsistency + ceph osd unset nodeep-scrub || return 1 + ceph osd unset noscrub || return 1 + + # what is the primary now? + local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'` + ceph pg dump pgs + sleep 2 + ceph pg $pgid deep_scrub || return 1 + sleep 5 + ceph pg dump pgs + (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log + grep -a -q "ERR" $dir/osd.$cur_prim.log || return 1 + + # but did we fix the snap issue? let's try scrubbing again + + local prev_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l` + echo "prev count: $prev_err_cnt" + + # scrub again. No errors expected this time + ceph pg $pgid deep_scrub || return 1 + sleep 5 + ceph pg dump pgs + (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log + local current_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l` + (( extr_dbg >= 1 )) && echo "current count: $current_err_cnt" + (( current_err_cnt == prev_err_cnt )) || return 1 + kill_daemons $dir TERM osd || return 1 + kvdir=$dir/$cur_prim + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_' \ + | awk -e '{print $2;}' + local num_sna_full=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \ + | awk -e '{print $2;}' | wc -l` + (( num_sna_full == num_sna_b4 )) || return 1 + return 0 +} + + +main osd-mapper "$@" diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh new file mode 100755 index 000000000..9541852c7 --- /dev/null +++ b/qa/standalone/scrub/osd-recovery-scrub.sh @@ -0,0 +1,352 @@ +#! /usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +# Simple test for "not scheduling scrubs due to active recovery" +# OSD::sched_scrub() called on all OSDs during ticks +function TEST_recovery_scrub_1() { + local dir=$1 + local poolname=test + + TESTDATA="testdata.$$" + OSDS=4 + PGS=1 + OBJECTS=100 + ERRORS=0 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ + --osd_scrub_interval_randomize_ratio=0.0 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_during_recovery=false || return 1 + done + + # Create a pool with $PGS pgs + create_pool $poolname $PGS $PGS + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + ceph pg dump pgs + + dd if=/dev/urandom of=$TESTDATA bs=1M count=50 + for i in $(seq 1 $OBJECTS) + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + ceph osd pool set $poolname size 4 + + # Wait for recovery to start + set -o pipefail + count=0 + while(true) + do + if ceph --format json pg dump pgs | + jq '.pg_stats | [.[] | .state | contains("recovering")]' | grep -q true + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Recovery never started" + return 1 + fi + count=$(expr $count + 1) + done + set +o pipefail + ceph pg dump pgs + + sleep 10 + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + + declare -a err_strings + err_strings[0]="not scheduling scrubs due to active recovery" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + grep "not scheduling scrubs" $dir/osd.${osd}.log + done + for err_string in "${err_strings[@]}" + do + found=false + count=0 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if grep -q "$err_string" $dir/osd.${osd}.log + then + found=true + count=$(expr $count + 1) + fi + done + if [ "$found" = "false" ]; then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + [ $count -eq $OSDS ] || return 1 + done + + teardown $dir || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +## +# a modified version of wait_for_scrub(), which terminates if the Primary +# of the to-be-scrubbed PG changes +# +# Given the *last_scrub*, wait for scrub to happen on **pgid**. It +# will fail if scrub does not complete within $TIMEOUT seconds. The +# repair is complete whenever the **get_last_scrub_stamp** function +# reports a timestamp different from the one given in argument. +# +# @param pgid the id of the PG +# @param the primary OSD when started +# @param last_scrub timestamp of the last scrub for *pgid* +# @return 0 on success, 1 on error +# +function wait_for_scrub_mod() { + local pgid=$1 + local orig_primary=$2 + local last_scrub="$3" + local sname=${4:-last_scrub_stamp} + + for ((i=0; i < $TIMEOUT; i++)); do + sleep 0.2 + if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then + return 0 + fi + sleep 1 + # are we still the primary? + local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + if [ $orig_primary != $current_primary ]; then + echo $orig_primary no longer primary for $pgid + return 0 + fi + done + return 1 +} + +## +# A modified version of pg_scrub() +# +# Run scrub on **pgid** and wait until it completes. The pg_scrub +# function will fail if repair does not complete within $TIMEOUT +# seconds. The pg_scrub is complete whenever the +# **get_last_scrub_stamp** function reports a timestamp different from +# the one stored before starting the scrub, or whenever the Primary +# changes. +# +# @param pgid the id of the PG +# @return 0 on success, 1 on error +# +function pg_scrub_mod() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid) + # locate the primary + local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + local recovery=false + ceph pg scrub $pgid + #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" + if ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" | grep -q recovering + then + recovery=true + fi + wait_for_scrub_mod $pgid $my_primary "$last_scrub" || return 1 + if test $recovery = "true" + then + return 2 + fi +} + +# Same as wait_background() except that it checks for exit code 2 and bumps recov_scrub_count +function wait_background_check() { + # We extract the PIDS from the variable name + pids=${!1} + + return_code=0 + for pid in $pids; do + wait $pid + retcode=$? + if test $retcode -eq 2 + then + recov_scrub_count=$(expr $recov_scrub_count + 1) + elif test $retcode -ne 0 + then + # If one process failed then return 1 + return_code=1 + fi + done + + # We empty the variable reporting that all process ended + eval "$1=''" + + return $return_code +} + +# osd_scrub_during_recovery=true make sure scrub happens +function TEST_recovery_scrub_2() { + local dir=$1 + local poolname=test + + TESTDATA="testdata.$$" + OSDS=8 + PGS=32 + OBJECTS=40 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ + --osd_scrub_interval_randomize_ratio=0.0 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=10 || return 1 + done + + # Create a pool with $PGS pgs + create_pool $poolname $PGS $PGS + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1M count=50 + for i in $(seq 1 $OBJECTS) + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + ceph osd pool set $poolname size 3 + + ceph pg dump pgs + + # Wait for recovery to start + count=0 + while(true) + do + #ceph --format json pg dump pgs | jq '.pg_stats | [.[].state]' + if test $(ceph --format json pg dump pgs | + jq '.pg_stats | [.[].state]'| grep recovering | wc -l) -ge 2 + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Not enough recovery started simultaneously" + return 1 + fi + count=$(expr $count + 1) + done + ceph pg dump pgs + + pids="" + recov_scrub_count=0 + for pg in $(seq 0 $(expr $PGS - 1)) + do + run_in_background pids pg_scrub_mod $poolid.$(printf "%x" $pg) + done + wait_background_check pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + ERRORS=0 + if test $recov_scrub_count -eq 0 + then + echo "No scrubs occurred while PG recovering" + ERRORS=$(expr $ERRORS + 1) + fi + + pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') + pid=$(cat $pidfile) + if ! kill -0 $pid + then + echo "OSD crash occurred" + #tail -100 $dir/osd.0.log + ERRORS=$(expr $ERRORS + 1) + fi + + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + + declare -a err_strings + err_strings[0]="not scheduling scrubs due to active recovery" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + grep "not scheduling scrubs" $dir/osd.${osd}.log + done + for err_string in "${err_strings[@]}" + do + found=false + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if grep "$err_string" $dir/osd.${osd}.log > /dev/null; + then + found=true + fi + done + if [ "$found" = "true" ]; then + echo "Found log message not expected '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + teardown $dir || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +main osd-recovery-scrub "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-recovery-scrub.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-dump.sh b/qa/standalone/scrub/osd-scrub-dump.sh new file mode 100755 index 000000000..f21ec7801 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-dump.sh @@ -0,0 +1,180 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +MAX_SCRUBS=4 +SCRUB_SLEEP=3 +POOL_SIZE=3 + +function run() { + local dir=$1 + shift + local CHUNK_MAX=5 + + export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS " + CEPH_ARGS+="--osd_shallow_scrub_chunk_max=$CHUNK_MAX " + CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP " + CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE " + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + CEPH_ARGS+="--osd_op_queue=wpq " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_recover_unexpected() { + local dir=$1 + shift + local OSDS=6 + local PGS=16 + local POOLS=3 + local OBJS=1000 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for o in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $o + done + + for i in $(seq 1 $POOLS) + do + create_pool test$i $PGS $PGS + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=datafile bs=4k count=2 + for i in $(seq 1 $POOLS) + do + for j in $(seq 1 $OBJS) + do + rados -p test$i put obj$j datafile + done + done + rm datafile + + ceph osd set noscrub + ceph osd set nodeep-scrub + + for qpg in $(ceph pg dump pgs --format=json-pretty | jq '.pg_stats[].pgid') + do + primary=$(ceph pg dump pgs --format=json | jq ".pg_stats[] | select(.pgid == $qpg) | .acting_primary") + eval pg=$qpg # strip quotes around qpg + ceph tell $pg scrub + done + + ceph pg dump pgs + + max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs') + if [ $max != $MAX_SCRUBS ]; then + echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations" + return 1 + fi + + ceph osd unset noscrub + + ok=false + for i in $(seq 0 300) + do + ceph pg dump pgs + if ceph pg dump pgs | grep '+scrubbing'; then + ok=true + break + fi + sleep 1 + done + if test $ok = "false"; then + echo "ERROR: Test set-up failed no scrubbing" + return 1 + fi + + local total=0 + local zerocount=0 + local maxzerocount=3 + while(true) + do + pass=0 + for o in $(seq 0 $(expr $OSDS - 1)) + do + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations + scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote') + if [ $scrubs -gt $MAX_SCRUBS ]; then + echo "ERROR: More than $MAX_SCRUBS currently reserved" + return 1 + fi + pass=$(expr $pass + $scrubs) + done + if [ $pass = "0" ]; then + zerocount=$(expr $zerocount + 1) + fi + if [ $zerocount -gt $maxzerocount ]; then + break + fi + total=$(expr $total + $pass) + if [ $total -gt 0 ]; then + # already saw some reservations, so wait longer to avoid excessive over-counting. + # Note the loop itself takes about 2-3 seconds + sleep $(expr $SCRUB_SLEEP - 2) + else + sleep 0.5 + fi + done + + # Check that there are no more scrubs + for i in $(seq 0 5) + do + if ceph pg dump pgs | grep '+scrubbing'; then + echo "ERROR: Extra scrubs after test completion...not expected" + return 1 + fi + sleep $SCRUB_SLEEP + done + + echo $total total reservations seen + + # Sort of arbitraty number based on PGS * POOLS * POOL_SIZE as the number of total scrub + # reservations that must occur. However, the loop above might see the same reservation more + # than once. + actual_reservations=$(expr $PGS \* $POOLS \* $POOL_SIZE) + if [ $total -lt $actual_reservations ]; then + echo "ERROR: Unexpectedly low amount of scrub reservations seen during test" + return 1 + fi + + return 0 +} + + +main osd-scrub-dump "$@" + +# Local Variables: +# compile-command: "cd build ; make check && \ +# ../qa/run-standalone.sh osd-scrub-dump.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh new file mode 100755 index 000000000..13b30360c --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -0,0 +1,6255 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +set -x +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +if [ `uname` = FreeBSD ]; then + # erasure coding overwrites are only tested on Bluestore + # erasure coding on filestore is unsafe + # http://docs.ceph.com/en/latest/rados/operations/erasure-code/#erasure-coding-with-overwrites + use_ec_overwrite=false +else + use_ec_overwrite=true +fi + +# Test development and debugging +# Set to "yes" in order to ignore diff errors and save results to update test +getjson="no" + +# Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info. +jqfilter='def walk(f): + . as $in + | if type == "object" then + reduce keys[] as $key + ( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f + elif type == "array" then map( walk(f) ) | f + else f + end; +walk(if type == "object" then del(.mtime) else . end) +| walk(if type == "object" then del(.local_mtime) else . end) +| walk(if type == "object" then del(.last_reqid) else . end) +| walk(if type == "object" then del(.version) else . end) +| walk(if type == "object" then del(.prior_version) else . end)' + +sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))' + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd-skip-data-digest=false " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function add_something() { + local dir=$1 + local poolname=$2 + local obj=${3:-SOMETHING} + local scrub=${4:-noscrub} + + if [ "$scrub" = "noscrub" ]; + then + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + else + ceph osd unset noscrub || return 1 + ceph osd unset nodeep-scrub || return 1 + fi + + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put $obj $dir/ORIGINAL || return 1 +} + +# +# Corrupt one copy of a replicated pool +# +function TEST_corrupt_and_repair_replicated() { + local dir=$1 + local poolname=rbd + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 + # Reproduces http://tracker.ceph.com/issues/8914 + corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1 +} + +# +# Allow repair to be scheduled when some recovering is still undergoing on the same OSD +# +function TEST_allow_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 +} + +# +# Skip non-repair scrub correctly during recovery +# +function TEST_skip_non_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 +} + +function scrub_and_not_schedule() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) start a non-repair scrub + # + local pg=$(get_pg $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + # + # 2) Assure the scrub is not scheduled + # + for ((i=0; i < 3; i++)); do + if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done + + # + # 3) Access to the file must OK + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +function corrupt_and_repair_two() { + local dir=$1 + local poolname=$2 + local first=$3 + local second=$4 + + # + # 1) remove the corresponding file from the OSDs + # + pids="" + run_in_background pids objectstore_tool $dir $first SOMETHING remove + run_in_background pids objectstore_tool $dir $second SOMETHING remove + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + # + # 2) repair the PG + # + local pg=$(get_pg $poolname SOMETHING) + repair $pg + # + # 3) The files must be back + # + pids="" + run_in_background pids objectstore_tool $dir $first SOMETHING list-attrs + run_in_background pids objectstore_tool $dir $second SOMETHING list-attrs + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +# +# 1) add an object +# 2) remove the corresponding file from a designated OSD +# 3) repair the PG +# 4) check that the file has been restored in the designated OSD +# +function corrupt_and_repair_one() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) remove the corresponding file from the OSD + # + objectstore_tool $dir $osd SOMETHING remove || return 1 + # + # 2) repair the PG + # + local pg=$(get_pg $poolname SOMETHING) + repair $pg + # + # 3) The file must be back + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +function corrupt_and_repair_erasure_coded() { + local dir=$1 + local poolname=$2 + + add_something $dir $poolname || return 1 + + local primary=$(get_primary $poolname SOMETHING) + local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//")) + local not_primary_first=${osds[0]} + local not_primary_second=${osds[1]} + + # Reproduces http://tracker.ceph.com/issues/10017 + corrupt_and_repair_one $dir $poolname $primary || return 1 + # Reproduces http://tracker.ceph.com/issues/10409 + corrupt_and_repair_one $dir $poolname $not_primary_first || return 1 + corrupt_and_repair_two $dir $poolname $not_primary_first $not_primary_second || return 1 + corrupt_and_repair_two $dir $poolname $primary $not_primary_first || return 1 + +} + +function auto_repair_erasure_coded() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd-deep-scrub-interval=5 \ + --osd-scrub-max-interval=5 \ + --osd-scrub-min-interval=5 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + # Create an EC pool + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + # Wait for auto repair + local pgid=$(get_pg $poolname SOMETHING) + wait_for_scrub $pgid "$(get_last_scrub_stamp $pgid)" + wait_for_clean || return 1 + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +function TEST_auto_repair_erasure_coded_appends() { + auto_repair_erasure_coded $1 false +} + +function TEST_auto_repair_erasure_coded_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + auto_repair_erasure_coded $1 true + fi +} + +# initiate a scrub, then check for the (expected) 'scrubbing' and the +# (not expected until an error was identified) 'repair' +# Arguments: osd#, pg, sleep time +function initiate_and_fetch_state() { + local the_osd="osd.$1" + local pgid=$2 + local last_scrub=$(get_last_scrub_stamp $pgid) + + set_config "osd" "$1" "osd_scrub_sleep" "$3" + set_config "osd" "$1" "osd_scrub_auto_repair" "true" + + flush_pg_stats + date --rfc-3339=ns + + # note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one + env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid" + env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid" + + # wait for 'scrubbing' to appear + for ((i=0; i < 80; i++)); do + + st=`ceph pg $pgid query --format json | jq '.state' ` + echo $i ") state now: " $st + + case "$st" in + *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this + *scrubbing* ) echo "found scrub"; return 0;; + *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;; + *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;; + * ) echo $st;; + esac + + if [ $((i % 10)) == 4 ]; then + echo "loop --------> " $i + fi + sleep 0.3 + done + + echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start" + return 1 +} + +function wait_end_of_scrub() { # osd# pg + local the_osd="osd.$1" + local pgid=$2 + + for ((i=0; i < 40; i++)); do + st=`ceph pg $pgid query --format json | jq '.state' ` + echo "wait-scrub-end state now: " $st + [[ $st =~ (.*scrubbing.*) ]] || break + if [ $((i % 5)) == 4 ] ; then + flush_pg_stats + fi + sleep 0.3 + done + + if [[ $st =~ (.*scrubbing.*) ]] + then + # a timeout + return 1 + fi + return 0 +} + + +function TEST_auto_repair_bluestore_tag() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 3 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0 \ + --osd-op-queue=wpq" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + echo "Affected PG " $pgid " w/ primary " $primary + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + initiate_and_fetch_state $primary $pgid "3.0" + r=$? + echo "initiate_and_fetch_state ret: " $r + set_config "osd" "$1" "osd_scrub_sleep" "0" + if [ $r -ne 0 ]; then + return 1 + fi + + wait_end_of_scrub "$primary" "$pgid" || return 1 + ceph pg dump pgs + + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log +} + + +function TEST_auto_repair_bluestore_basic() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid deep_scrub + ceph tell $pgid scrub + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + ceph pg dump pgs + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log +} + +function TEST_auto_repair_bluestore_scrub() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0 \ + --osd-scrub-backoff-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid scrub + + # Wait for scrub -> auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + ceph pg dump pgs + # Actually this causes 2 scrubs, so we better wait a little longer + sleep 5 + wait_for_clean || return 1 + ceph pg dump pgs + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # This should have caused 1 object to be repaired + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 +} + +function TEST_auto_repair_bluestore_failed() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 10) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1 + # obj2 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1 + + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid deep_scrub + ceph tell $pgid scrub + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + flush_pg_stats + grep scrub_finish $dir/osd.${primary}.log + grep -q "scrub_finish.*still present after re-scrub" $dir/osd.${primary}.log || return 1 + ceph pg dump pgs + ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1 + + # Verify - obj1 should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname obj1) obj1 list-attrs || return 1 + rados --pool $poolname get obj1 $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # Make it repairable + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 remove || return 1 + repair $pgid + sleep 2 + + flush_pg_stats + ceph pg dump pgs + ceph pg dump pgs | grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " || return 1 + grep scrub_finish $dir/osd.${primary}.log +} + +function TEST_auto_repair_bluestore_failed_norecov() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 10) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + # obj1 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj1 rm-attr _ || return 1 + # obj2 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1 + + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid deep_scrub + ceph tell $pgid scrub + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + flush_pg_stats + grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1 + ceph pg dump pgs + ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1 +} + +function TEST_repair_stats() { + local dir=$1 + local poolname=testpool + local OSDS=2 + local OBJS=30 + # This need to be an even number + local REPAIRS=20 + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + activate_osd $dir $primary $ceph_osd_args || return 1 + activate_osd $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + flush_pg_stats + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 +} + +function TEST_repair_stats_ec() { + local dir=$1 + local poolname=testpool + local OSDS=3 + local OBJS=30 + # This need to be an even number + local REPAIRS=26 + local allow_overwrites=false + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + # Create an EC pool + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + activate_osd $dir $primary $ceph_osd_args || return 1 + activate_osd $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + flush_pg_stats + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) ; do + if [ $osd = $other -o $osd = $primary ]; then + repair=$(expr $REPAIRS / 2) + else + repair="0" + fi + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired") + test "$COUNT" = "$repair" || return 1 + done + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 +} + +function corrupt_and_repair_jerasure() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 3) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1 + corrupt_and_repair_erasure_coded $dir $poolname || return 1 +} + +function TEST_corrupt_and_repair_jerasure_appends() { + corrupt_and_repair_jerasure $1 false +} + +function TEST_corrupt_and_repair_jerasure_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + corrupt_and_repair_jerasure $1 true + fi +} + +function corrupt_and_repair_lrc() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 9) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_ec_pool $poolname $allow_overwrites k=4 m=2 l=3 plugin=lrc || return 1 + corrupt_and_repair_erasure_coded $dir $poolname || return 1 +} + +function TEST_corrupt_and_repair_lrc_appends() { + corrupt_and_repair_lrc $1 false +} + +function TEST_corrupt_and_repair_lrc_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + corrupt_and_repair_lrc $1 true + fi +} + +function unfound_erasure_coded() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + local payload=ABCDEF + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 3) ; do + run_osd $dir $id || return 1 + done + + create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1 + + add_something $dir $poolname || return 1 + + local primary=$(get_primary $poolname SOMETHING) + local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//")) + local not_primary_first=${osds[0]} + local not_primary_second=${osds[1]} + local not_primary_third=${osds[2]} + + # + # 1) remove the corresponding file from the OSDs + # + pids="" + run_in_background pids objectstore_tool $dir $not_primary_first SOMETHING remove + run_in_background pids objectstore_tool $dir $not_primary_second SOMETHING remove + run_in_background pids objectstore_tool $dir $not_primary_third SOMETHING remove + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + # + # 2) repair the PG + # + local pg=$(get_pg $poolname SOMETHING) + repair $pg + # + # 3) check pg state + # + # it may take a bit to appear due to mon/mgr asynchrony + for f in `seq 1 60`; do + ceph -s | grep "1/1 objects unfound" && break + sleep 1 + done + ceph -s|grep "4 up" || return 1 + ceph -s|grep "4 in" || return 1 + ceph -s|grep "1/1 objects unfound" || return 1 +} + +function TEST_unfound_erasure_coded_appends() { + unfound_erasure_coded $1 false +} + +function TEST_unfound_erasure_coded_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + unfound_erasure_coded $1 true + fi +} + +# +# list_missing for EC pool +# +function list_missing_erasure_coded() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 2) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object and remove the two shards (including primary) + add_something $dir $poolname MOBJ0 || return 1 + local -a osds0=($(get_osds $poolname MOBJ0)) + + # Put another object and remove two shards (excluding primary) + add_something $dir $poolname MOBJ1 || return 1 + local -a osds1=($(get_osds $poolname MOBJ1)) + + # Stop all osd daemons + for id in $(seq 0 2) ; do + kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1 + done + + id=${osds0[0]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ0 remove || return 1 + id=${osds0[1]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ0 remove || return 1 + + id=${osds1[1]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ1 remove || return 1 + id=${osds1[2]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ1 remove || return 1 + + for id in $(seq 0 2) ; do + activate_osd $dir $id >&2 || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + # Get get - both objects should in the same PG + local pg=$(get_pg $poolname MOBJ0) + + # Repair the PG, which triggers the recovering, + # and should mark the object as unfound + repair $pg + + for i in $(seq 0 120) ; do + [ $i -lt 60 ] || return 1 + matches=$(ceph pg $pg list_unfound | egrep "MOBJ0|MOBJ1" | wc -l) + [ $matches -eq 2 ] && break + done +} + +function TEST_list_missing_erasure_coded_appends() { + list_missing_erasure_coded $1 false +} + +function TEST_list_missing_erasure_coded_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + list_missing_erasure_coded $1 true + fi +} + +# +# Corrupt one copy of a replicated pool +# +function TEST_corrupt_scrub_replicated() { + local dir=$1 + local poolname=csr_pool + local total_objs=19 + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool foo 1 || return 1 + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + add_something $dir $poolname $objname || return 1 + + rados --pool $poolname setomapheader $objname hdr-$objname || return 1 + rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + # Increase file 1 MB + 1KB + dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025 + rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1 + rm -f $dir/new.ROBJ19 + + local pg=$(get_pg $poolname ROBJ0) + local primary=$(get_primary $poolname ROBJ0) + + # Compute an old omap digest and save oi + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \ + config set osd_deep_scrub_update_digest_min_age 0 + CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \ + config set osd_deep_scrub_update_digest_min_age 0 + pg_deep_scrub $pg + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + + # Alternate corruption between osd.0 and osd.1 + local osd=$(expr $i % 2) + + case $i in + 1) + # Size (deep scrub data_digest too) + local payload=UVWXYZZZ + echo $payload > $dir/CORRUPT + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 2) + # digest (deep scrub only) + local payload=UVWXYZ + echo $payload > $dir/CORRUPT + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 3) + # missing + objectstore_tool $dir $osd $objname remove || return 1 + ;; + + 4) + # Modify omap value (deep scrub only) + objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1 + ;; + + 5) + # Delete omap key (deep scrub only) + objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1 + ;; + + 6) + # Add extra omap key (deep scrub only) + echo extra > $dir/extra-val + objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1 + rm $dir/extra-val + ;; + + 7) + # Modify omap header (deep scrub only) + echo -n newheader > $dir/hdr + objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1 + rm $dir/hdr + ;; + + 8) + rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1 + rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1 + + # Break xattrs + echo -n bad-val > $dir/bad-val + objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1 + objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1 + echo -n val3-$objname > $dir/newval + objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1 + rm $dir/bad-val $dir/newval + ;; + + 9) + objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi + echo -n D > $dir/change + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi + rm $dir/oi $dir/change + ;; + + # ROBJ10 must be handled after digests are re-computed by a deep scrub below + # ROBJ11 must be handled with config change before deep scrub + # ROBJ12 must be handled with config change before scrubs + # ROBJ13 must be handled before scrubs + + 14) + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1 + objectstore_tool $dir 1 $objname rm-attr _ || return 1 + rm $dir/bad-val + ;; + + 15) + objectstore_tool $dir $osd $objname rm-attr _ || return 1 + ;; + + 16) + objectstore_tool $dir 0 $objname rm-attr snapset || return 1 + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1 + ;; + + 17) + # Deep-scrub only (all replicas are diffent than the object info + local payload=ROBJ17 + echo $payload > $dir/new.ROBJ17 + objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1 + objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1 + ;; + + 18) + # Deep-scrub only (all replicas are diffent than the object info + local payload=ROBJ18 + echo $payload > $dir/new.ROBJ18 + objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1 + objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1 + # Make one replica have a different object info, so a full repair must happen too + objectstore_tool $dir $osd $objname corrupt-info || return 1 + ;; + + 19) + # Set osd-max-object-size smaller than this object's size + + esac + done + + local pg=$(get_pg $poolname ROBJ0) + + ceph tell osd.\* injectargs -- --osd-max-object-size=1048576 + + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + + pg_scrub $pg + + ERRORS=0 + declare -a s_err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : object info inconsistent " + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'" + err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key" + err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info" + err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info" + err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch" + err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0" + err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error" + err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing" + err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : candidate size 1 info size 7 mismatch" + err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent " + err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr" + err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049713/1049720 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 8 inconsistent objects" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 18 errors" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:123a5f55:::ROBJ19:head : size 1049600 > 1048576 is too large" + + for err_string in "${err_strings[@]}" + do + if ! grep -q "$err_string" $dir/osd.${primary}.log + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:33:19.804040", + "local_mtime": "2018-04-05 14:33:19.804839", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ1" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ12", + "key": "", + "snapid": -2, + "hash": 3920199997, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'56", + "prior_version": "43'36", + "last_reqid": "osd.1.0:55", + "user_version": 36, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x067f306a", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "stat_error" + ], + "errors": [], + "object": { + "version": 36, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ12" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ13", + "key": "", + "snapid": -2, + "hash": 2682806379, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'59", + "prior_version": "45'39", + "last_reqid": "osd.1.0:58", + "user_version": 39, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x6441854d", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "stat_error" + ], + "errors": [], + "object": { + "version": 39, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ13" + } + }, + { + "shards": [ + { + "object_info": "bad-val", + "size": 7, + "errors": [ + "info_corrupted" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "union_shard_errors": [ + "info_missing", + "info_corrupted" + ], + "errors": [], + "object": { + "version": 0, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ14" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "2018-04-05 14:33:29.498969", + "local_mtime": "2018-04-05 14:33:29.499890", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "info_missing" + ], + "errors": [], + "object": { + "version": 45, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ15" + } + }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "errors": [ + "snapset_missing" + ], + "osd": 0, + "primary": false, + "size": 7 + }, + { + "errors": [ + "snapset_corrupted" + ], + "osd": 1, + "primary": true, + "snapset": "bad-val", + "size": 7 + } + ], + "union_shard_errors": [ + "snapset_missing", + "snapset_corrupted" + ] + }, + { + "errors": [ + "object_info_inconsistency" + ], + "object": { + "locator": "", + "name": "ROBJ18", + "nspace": "", + "snap": "head" + }, + "selected_object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "shards": [ + { + "errors": [], + "object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "osd": 0, + "primary": false, + "size": 7 + }, + { + "errors": [], + "object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [] + }, + { + "object": { + "name": "ROBJ19", + "nspace": "", + "locator": "", + "snap": "head", + "version": 58 + }, + "errors": [ + "size_too_large" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ19", + "key": "", + "snapid": -2, + "hash": 2868534344, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "63'59", + "prior_version": "63'58", + "last_reqid": "osd.1.0:58", + "user_version": 58, + "size": 1049600, + "mtime": "2019-08-09T23:33:58.340709+0000", + "local_mtime": "2019-08-09T23:33:58.345676+0000", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x3dde0ef3", + "omap_digest": "0xbffddd28", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 1049600 + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 1049600 + } + ] + }, + { + "shards": [ + { + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ3", + "key": "", + "snapid": -2, + "hash": 625845583, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'61", + "prior_version": "25'9", + "last_reqid": "osd.1.0:60", + "user_version": 9, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x00b35dfd", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 9, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val2-ROBJ8", + "name": "key2-ROBJ8" + } + ], + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-ROBJ8", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val3-ROBJ8", + "name": "key3-ROBJ8" + } + ], + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ8", + "key": "", + "snapid": -2, + "hash": 2359695969, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "79'66", + "prior_version": "79'65", + "last_reqid": "client.4554.0:1", + "user_version": 79, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 66, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ8" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "95'67", + "prior_version": "51'64", + "last_reqid": "client.4649.0:1", + "user_version": 80, + "size": 1, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2b63260d", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 1, + "errors": [], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'64", + "prior_version": "37'27", + "last_reqid": "osd.1.0:63", + "user_version": 27, + "size": 7, + "mtime": "2018-04-05 14:33:25.352485", + "local_mtime": "2018-04-05 14:33:25.353746", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 1, + "errors": [ + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "95'67", + "prior_version": "51'64", + "last_reqid": "client.4649.0:1", + "user_version": 80, + "size": 1, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2b63260d", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "obj_size_info_mismatch" + ], + "errors": [ + "object_info_inconsistency" + ], + "object": { + "version": 67, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ9" + } + } + ], + "epoch": 0 +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save1.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + objname=ROBJ9 + # Change data and size again because digest was recomputed + echo -n ZZZ > $dir/change + rados --pool $poolname put $objname $dir/change + # Set one to an even older value + objectstore_tool $dir 0 $objname set-attr _ $dir/robj9-oi + rm $dir/oi $dir/change + + objname=ROBJ10 + objectstore_tool $dir 1 $objname get-attr _ > $dir/oi + rados --pool $poolname setomapval $objname key2-$objname val2-$objname + objectstore_tool $dir 0 $objname set-attr _ $dir/oi + objectstore_tool $dir 1 $objname set-attr _ $dir/oi + rm $dir/oi + + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + + # ROBJ19 won't error this time + ceph tell osd.\* injectargs -- --osd-max-object-size=134217728 + + pg_deep_scrub $pg + + err_strings=() + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)], object info inconsistent " + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)]" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : failed to pick suitable auth object" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key" + err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'" + err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:87abbf36:::ROBJ11:head : candidate had a read error" + err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]" + err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]" + err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8aa5320e:::ROBJ17:head : failed to pick suitable auth object" + err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0xefced57a != omap_digest 0x6a73cc07 from shard 1" + err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0x6a73cc07 != omap_digest 0xefced57a from auth oi 3:8b55fa4b:::ROBJ7:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [[]0 0 0[]][)]" + err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:a53c12e8:::ROBJ6:head : omap_digest 0x689ee887 != omap_digest 0x179c919f from shard 1, omap_digest 0x689ee887 != omap_digest 0x179c919f from auth oi 3:a53c12e8:::ROBJ6:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [[]0 0 0[]][)]" + err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]" + err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]" + err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:b1f19cbd:::ROBJ10:head : failed to pick suitable auth object" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error" + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info" + err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch" + err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0" + err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error" + err_strings[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error" + err_strings[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info" + err_strings[27]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:e97ce31e:::ROBJ2:head : data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from shard 1, data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from auth oi 3:e97ce31e:::ROBJ2:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [[]0 0 0[]][)]" + err_strings[28]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing" + err_strings[29]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:f4981d31:::ROBJ4:head : omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from shard 1, omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from auth oi 3:f4981d31:::ROBJ4:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [[]0 0 0[]][)]" + err_strings[30]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x1a862a41 != omap_digest 0x6cac8f6 from shard 1" + err_strings[31]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x6cac8f6 != omap_digest 0x1a862a41 from auth oi 3:f4bfd4d1:::ROBJ5:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [[]0 0 0[]][)]" + err_strings[32]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : candidate size 3 info size 7 mismatch" + err_strings[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent " + err_strings[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr" + err_strings[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input" + err_strings[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049715/1049716 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects" + err_strings[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors" + + for err_string in "${err_strings[@]}" + do + if ! grep -q "$err_string" $dir/osd.${primary}.log + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:33:19.804040", + "local_mtime": "2018-04-05 14:33:19.804839", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x2d4a11c2", + "omap_digest": "0xf5fba2c6", + "size": 9, + "errors": [ + "data_digest_mismatch_info", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:33:19.804040", + "local_mtime": "2018-04-05 14:33:19.804839", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "data_digest_mismatch_info", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "data_digest_mismatch", + "size_mismatch" + ], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ1" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa8dd5adc", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa8dd5adc", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 3174666125, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ10", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xc2025a24", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 30, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [], + "object": { + "version": 30, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ10" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa03cef03", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "read_error" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ11", + "key": "", + "snapid": -2, + "hash": 1828574689, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'52", + "prior_version": "41'33", + "last_reqid": "osd.1.0:51", + "user_version": 33, + "size": 7, + "mtime": "2018-04-05 14:33:26.761286", + "local_mtime": "2018-04-05 14:33:26.762368", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa03cef03", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error" + ], + "errors": [], + "object": { + "version": 33, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ11" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x067f306a", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ12", + "key": "", + "snapid": -2, + "hash": 3920199997, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'56", + "prior_version": "43'36", + "last_reqid": "osd.1.0:55", + "user_version": 36, + "size": 7, + "mtime": "2018-04-05 14:33:27.460958", + "local_mtime": "2018-04-05 14:33:27.462109", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x067f306a", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "stat_error" + ], + "errors": [], + "object": { + "version": 36, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ12" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "read_error" + ], + "osd": 1, + "primary": true + } + ], + "union_shard_errors": [ + "stat_error", + "read_error" + ], + "errors": [], + "object": { + "version": 0, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ13" + } + }, + { + "shards": [ + { + "object_info": "bad-val", + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x4f14f849", + "size": 7, + "errors": [ + "info_corrupted" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x4f14f849", + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "union_shard_errors": [ + "info_missing", + "info_corrupted" + ], + "errors": [], + "object": { + "version": 0, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ14" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "2018-04-05 14:33:29.498969", + "local_mtime": "2018-04-05 14:33:29.499890", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "2018-04-05 14:33:29.498969", + "local_mtime": "2018-04-05 14:33:29.499890", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "info_missing" + ], + "errors": [], + "object": { + "version": 45, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ15" + } + }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "errors": [ + "snapset_missing" + ], + "omap_digest": "0x8b699207", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "snapset": "bad-val", + "data_digest": "0x2ddbf8f5", + "errors": [ + "snapset_corrupted" + ], + "omap_digest": "0x8b699207", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "snapset_missing", + "snapset_corrupted" + ] + }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ17", + "nspace": "", + "snap": "head" + }, + "selected_object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1884071249, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ17", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xe9572720", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 51, + "watchers": {} + }, + "shards": [ + { + "data_digest": "0x5af0c3ef", + "errors": [ + "data_digest_mismatch_info" + ], + "omap_digest": "0xe9572720", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "data_digest": "0x5af0c3ef", + "errors": [ + "data_digest_mismatch_info" + ], + "omap_digest": "0xe9572720", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "data_digest_mismatch_info" + ] + }, + { + "errors": [ + "object_info_inconsistency" + ], + "object": { + "locator": "", + "name": "ROBJ18", + "nspace": "", + "snap": "head" + }, + "selected_object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "shards": [ + { + "data_digest": "0xbd89c912", + "errors": [ + "data_digest_mismatch_info" + ], + "object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "omap_digest": "0xddc3680f", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "data_digest": "0xbd89c912", + "errors": [ + "data_digest_mismatch_info" + ], + "object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "omap_digest": "0xddc3680f", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "data_digest_mismatch_info" + ] + }, + { + "shards": [ + { + "data_digest": "0x578a4830", + "omap_digest": "0xf8e11918", + "size": 7, + "errors": [ + "data_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf8e11918", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ2", + "key": "", + "snapid": -2, + "hash": 2026323607, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'60", + "prior_version": "23'6", + "last_reqid": "osd.1.0:59", + "user_version": 6, + "size": 7, + "mtime": "2018-04-05 14:33:20.498756", + "local_mtime": "2018-04-05 14:33:20.499704", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf8e11918", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "data_digest_mismatch_info" + ], + "errors": [ + "data_digest_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ2" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x00b35dfd", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ3", + "key": "", + "snapid": -2, + "hash": 625845583, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'61", + "prior_version": "25'9", + "last_reqid": "osd.1.0:60", + "user_version": 9, + "size": 7, + "mtime": "2018-04-05 14:33:21.189382", + "local_mtime": "2018-04-05 14:33:21.190446", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x00b35dfd", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 9, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ3" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd7178dfe", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xe2d46ea4", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ4", + "key": "", + "snapid": -2, + "hash": 2360875311, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'62", + "prior_version": "27'12", + "last_reqid": "osd.1.0:61", + "user_version": 12, + "size": 7, + "mtime": "2018-04-05 14:33:21.862313", + "local_mtime": "2018-04-05 14:33:21.863261", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xe2d46ea4", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 12, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ4" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x1a862a41", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x06cac8f6", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ5", + "key": "", + "snapid": -2, + "hash": 2334915887, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'63", + "prior_version": "29'15", + "last_reqid": "osd.1.0:62", + "user_version": 15, + "size": 7, + "mtime": "2018-04-05 14:33:22.589300", + "local_mtime": "2018-04-05 14:33:22.590376", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x1a862a41", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 15, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ5" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x689ee887", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x179c919f", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ6", + "key": "", + "snapid": -2, + "hash": 390610085, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'54", + "prior_version": "31'18", + "last_reqid": "osd.1.0:53", + "user_version": 18, + "size": 7, + "mtime": "2018-04-05 14:33:23.289188", + "local_mtime": "2018-04-05 14:33:23.290130", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x179c919f", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 18, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ6" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xefced57a", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x6a73cc07", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ7", + "key": "", + "snapid": -2, + "hash": 3529485009, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'53", + "prior_version": "33'21", + "last_reqid": "osd.1.0:52", + "user_version": 21, + "size": 7, + "mtime": "2018-04-05 14:33:23.979658", + "local_mtime": "2018-04-05 14:33:23.980731", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xefced57a", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 21, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ7" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val2-ROBJ8", + "name": "key2-ROBJ8" + } + ], + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-ROBJ8", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val3-ROBJ8", + "name": "key3-ROBJ8" + } + ], + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ8", + "key": "", + "snapid": -2, + "hash": 2359695969, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "79'66", + "prior_version": "79'65", + "last_reqid": "client.4554.0:1", + "user_version": 79, + "size": 7, + "mtime": "2018-04-05 14:34:05.598688", + "local_mtime": "2018-04-05 14:34:05.599698", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 66, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ8" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'64", + "prior_version": "37'27", + "last_reqid": "osd.1.0:63", + "user_version": 27, + "size": 7, + "mtime": "2018-04-05 14:33:25.352485", + "local_mtime": "2018-04-05 14:33:25.353746", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "size": 3, + "errors": [ + "obj_size_info_mismatch" + ], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "119'68", + "prior_version": "51'64", + "last_reqid": "client.4834.0:1", + "user_version": 81, + "size": 3, + "mtime": "2018-04-05 14:35:01.500659", + "local_mtime": "2018-04-05 14:35:01.502117", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "size": 3, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "119'68", + "prior_version": "51'64", + "last_reqid": "client.4834.0:1", + "user_version": 81, + "size": 3, + "mtime": "2018-04-05 14:35:01.500659", + "local_mtime": "2018-04-05 14:35:01.502117", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "obj_size_info_mismatch" + ], + "errors": [ + "object_info_inconsistency" + ], + "object": { + "version": 68, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ9" + } + } + ], + "epoch": 0 +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save2.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + repair $pg + wait_for_clean + + # This hangs if the repair doesn't work + timeout 30 rados -p $poolname get ROBJ17 $dir/robj17.out || return 1 + timeout 30 rados -p $poolname get ROBJ18 $dir/robj18.out || return 1 + # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17 + diff -q $dir/new.ROBJ17 $dir/robj17.out || return 1 + rm -f $dir/new.ROBJ17 $dir/robj17.out || return 1 + diff -q $dir/new.ROBJ18 $dir/robj18.out || return 1 + rm -f $dir/new.ROBJ18 $dir/robj18.out || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it +} + + +# +# Test scrub errors for an erasure coded pool +# +function corrupt_scrub_erasure() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + local total_objs=7 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 2) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + create_pool foo 1 + + create_ec_pool $poolname $allow_overwrites k=2 m=1 stripe_unit=2K --force || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=EOBJ${i} + add_something $dir $poolname $objname || return 1 + + local osd=$(expr $i % 2) + + case $i in + 1) + # Size (deep scrub data_digest too) + local payload=UVWXYZZZ + echo $payload > $dir/CORRUPT + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 2) + # Corrupt EC shard + dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=1 + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 3) + # missing + objectstore_tool $dir $osd $objname remove || return 1 + ;; + + 4) + rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1 + rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1 + + # Break xattrs + echo -n bad-val > $dir/bad-val + objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1 + objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1 + echo -n val3-$objname > $dir/newval + objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1 + rm $dir/bad-val $dir/newval + ;; + + 5) + # Corrupt EC shard + dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=2 + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 6) + objectstore_tool $dir 0 $objname rm-attr hinfo_key || return 1 + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 1 $objname set-attr hinfo_key $dir/bad-val || return 1 + ;; + + 7) + local payload=MAKETHISDIFFERENTFROMOTHEROBJECTS + echo $payload > $dir/DIFFERENT + rados --pool $poolname put $objname $dir/DIFFERENT || return 1 + + # Get hinfo_key from EOBJ1 + objectstore_tool $dir 0 EOBJ1 get-attr hinfo_key > $dir/hinfo + objectstore_tool $dir 0 $objname set-attr hinfo_key $dir/hinfo || return 1 + rm -f $dir/hinfo + ;; + + esac + done + + local pg=$(get_pg $poolname EOBJ0) + + pg_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "shard": 0, + "errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 1, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ1" + } + }, + { + "shards": [ + { + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "shard": 0, + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + }, + { + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ3", + "key": "", + "snapid": -2, + "hash": 3125668237, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "39'3", + "prior_version": "0'0", + "last_reqid": "client.4252.0:1", + "user_version": 3, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ] + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val3-EOBJ4", + "name": "key3-EOBJ4" + } + ] + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ4", + "key": "", + "snapid": -2, + "hash": 1618759290, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "45'6", + "prior_version": "45'5", + "last_reqid": "client.4294.0:1", + "user_version": 6, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ4" + } + }, + { + "shards": [ + { + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 4096, + "shard": 0, + "errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 7, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ5" + } + }, + { + "errors": [], + "object": { + "locator": "", + "name": "EOBJ6", + "nspace": "", + "snap": "head", + "version": 8 + }, + "selected_object_info": { + "oid": { + "oid": "EOBJ6", + "key": "", + "snapid": -2, + "hash": 3050890866, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "65'8", + "prior_version": "0'0", + "last_reqid": "client.4418.0:1", + "user_version": 8, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "errors": [ + "hinfo_missing" + ], + "osd": 0, + "primary": false, + "shard": 2, + "size": 2048 + }, + { + "errors": [ + "hinfo_corrupted" + ], + "osd": 1, + "primary": true, + "shard": 0, + "hashinfo": "bad-val", + "size": 2048 + }, + { + "errors": [], + "osd": 2, + "primary": false, + "shard": 1, + "size": 2048, + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ], + "union_shard_errors": [ + "hinfo_missing", + "hinfo_corrupted" + ] + }, + { + "errors": [ + "hinfo_inconsistency" + ], + "object": { + "locator": "", + "name": "EOBJ7", + "nspace": "", + "snap": "head", + "version": 10 + }, + "selected_object_info": { + "oid": { + "oid": "EOBJ7", + "key": "", + "snapid": -2, + "hash": 3258066308, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "75'10", + "prior_version": "75'9", + "last_reqid": "client.4482.0:1", + "user_version": 10, + "size": 34, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x136e4e27", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + }, + "errors": [], + "osd": 0, + "primary": false, + "shard": 2, + "size": 2048 + }, + { + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + }, + "errors": [], + "osd": 1, + "primary": true, + "shard": 0, + "size": 2048 + }, + { + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + }, + "errors": [], + "osd": 2, + "primary": false, + "shard": 1, + "size": 2048 + } + ], + "union_shard_errors": [] + } + ], + "epoch": 0 +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save3.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + pg_deep_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + if [ "$allow_overwrites" = "true" ] + then + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:31:33.837147", + "local_mtime": "2018-04-05 14:31:33.840763", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "shard": 0, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:31:33.837147", + "local_mtime": "2018-04-05 14:31:33.840763", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 1, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ1" + } + }, + { + "shards": [ + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "shard": 0, + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ3", + "key": "", + "snapid": -2, + "hash": 3125668237, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "39'3", + "prior_version": "0'0", + "last_reqid": "client.4252.0:1", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:31:46.841145", + "local_mtime": "2018-04-05 14:31:46.844996", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 0, + "osd": 1, + "primary": true + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val3-EOBJ4", + "name": "key3-EOBJ4" + } + ], + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 1, + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ4", + "key": "", + "snapid": -2, + "hash": 1618759290, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "45'6", + "prior_version": "45'5", + "last_reqid": "client.4294.0:1", + "user_version": 6, + "size": 7, + "mtime": "2018-04-05 14:31:54.663622", + "local_mtime": "2018-04-05 14:31:54.664527", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ4" + } + }, + { + "shards": [ + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:32:12.929161", + "local_mtime": "2018-04-05 14:32:12.934707", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 4096, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "shard": 0, + "osd": 1, + "primary": true + }, + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 1, + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:32:12.929161", + "local_mtime": "2018-04-05 14:32:12.934707", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 7, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ5" + } + }, + { + "object": { + "name": "EOBJ6", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [], + "union_shard_errors": [ + "read_error", + "hinfo_missing", + "hinfo_corrupted" + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ6", + "key": "", + "snapid": -2, + "hash": 3050890866, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "65'8", + "prior_version": "0'0", + "last_reqid": "client.4418.0:1", + "user_version": 8, + "size": 7, + "mtime": "2018-04-05 14:32:20.634116", + "local_mtime": "2018-04-05 14:32:20.637999", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [ + "read_error", + "hinfo_missing" + ], + "size": 2048 + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [ + "read_error", + "hinfo_corrupted" + ], + "size": 2048, + "hashinfo": "bad-val" + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + }, + { + "object": { + "name": "EOBJ7", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "hinfo_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "EOBJ7", + "key": "", + "snapid": -2, + "hash": 3258066308, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "75'10", + "prior_version": "75'9", + "last_reqid": "client.4482.0:1", + "user_version": 10, + "size": 34, + "mtime": "2018-04-05 14:32:33.058782", + "local_mtime": "2018-04-05 14:32:33.059679", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x136e4e27", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + } + ], + "epoch": 0 +} +EOF + + else + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4192.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:30:10.688009", + "local_mtime": "2018-04-05 14:30:10.691774", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "shard": 0, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4192.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:30:10.688009", + "local_mtime": "2018-04-05 14:30:10.691774", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 1, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ1" + } + }, + { + "shards": [ + { + "size": 2048, + "errors": [ + "ec_hash_error" + ], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 0, + "osd": 1, + "primary": true + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 1, + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ2", + "key": "", + "snapid": -2, + "hash": 562812377, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "33'2", + "prior_version": "0'0", + "last_reqid": "client.4224.0:1", + "user_version": 2, + "size": 7, + "mtime": "2018-04-05 14:30:14.152945", + "local_mtime": "2018-04-05 14:30:14.154014", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "ec_hash_error" + ], + "errors": [], + "object": { + "version": 2, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ2" + } + }, + { + "shards": [ + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [ + "missing" + ] + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ3", + "key": "", + "snapid": -2, + "hash": 3125668237, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "39'3", + "prior_version": "0'0", + "last_reqid": "client.4258.0:1", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:30:18.875544", + "local_mtime": "2018-04-05 14:30:18.880153", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x04cfa72f", + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ] + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x04cfa72f", + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val3-EOBJ4", + "name": "key3-EOBJ4" + } + ] + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ4", + "key": "", + "snapid": -2, + "hash": 1618759290, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "45'6", + "prior_version": "45'5", + "last_reqid": "client.4296.0:1", + "user_version": 6, + "size": 7, + "mtime": "2018-04-05 14:30:22.271983", + "local_mtime": "2018-04-05 14:30:22.272840", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ4" + } + }, + { + "shards": [ + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4384.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:30:35.162395", + "local_mtime": "2018-04-05 14:30:35.166390", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 4096, + "shard": 0, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4384.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:30:35.162395", + "local_mtime": "2018-04-05 14:30:35.166390", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 7, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ5" + } + }, + { + "object": { + "name": "EOBJ6", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [], + "union_shard_errors": [ + "read_error", + "hinfo_missing", + "hinfo_corrupted" + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ6", + "key": "", + "snapid": -2, + "hash": 3050890866, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "65'8", + "prior_version": "0'0", + "last_reqid": "client.4420.0:1", + "user_version": 8, + "size": 7, + "mtime": "2018-04-05 14:30:40.914673", + "local_mtime": "2018-04-05 14:30:40.917705", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [ + "read_error", + "hinfo_missing" + ], + "size": 2048 + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [ + "read_error", + "hinfo_corrupted" + ], + "size": 2048, + "hashinfo": "bad-val" + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x04cfa72f", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + }, + { + "object": { + "name": "EOBJ7", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "hinfo_inconsistency" + ], + "union_shard_errors": [ + "ec_hash_error" + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ7", + "key": "", + "snapid": -2, + "hash": 3258066308, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "75'10", + "prior_version": "75'9", + "last_reqid": "client.4486.0:1", + "user_version": 10, + "size": 34, + "mtime": "2018-04-05 14:30:50.995009", + "local_mtime": "2018-04-05 14:30:50.996112", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x136e4e27", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [ + "ec_hash_error" + ], + "size": 2048, + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x5b7455a8", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x5b7455a8", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + } + ], + "epoch": 0 +} +EOF + + fi + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + if [ "$allow_overwrites" = "true" ] + then + num=4 + else + num=5 + fi + jq '.' $dir/json > save${num}.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it +} + +function TEST_corrupt_scrub_erasure_appends() { + corrupt_scrub_erasure $1 false +} + +function TEST_corrupt_scrub_erasure_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + corrupt_scrub_erasure $1 true + fi +} + +# +# Test to make sure that a periodic scrub won't cause deep-scrub info to be lost +# +function TEST_periodic_scrub_replicated() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0" + run_osd $dir 0 $ceph_osd_args || return 1 + run_osd $dir 1 $ceph_osd_args || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + local osd=0 + add_something $dir $poolname $objname scrub || return 1 + local primary=$(get_primary $poolname $objname) + local pg=$(get_pg $poolname $objname) + + # Add deep-scrub only error + local payload=UVWXYZ + echo $payload > $dir/CORRUPT + # Uses $ceph_osd_args for osd restart + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + + # No scrub information available, so expect failure + set -o pipefail + ! rados list-inconsistent-obj $pg | jq '.' || return 1 + set +o pipefail + + pg_deep_scrub $pg || return 1 + + # Make sure bad object found + rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + + flush_pg_stats + local last_scrub=$(get_last_scrub_stamp $pg) + # Fake a schedule scrub + ceph tell $pg scrub || return 1 + # Wait for schedule regular scrub + wait_for_scrub $pg "$last_scrub" + + # It needed to be upgraded + grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.${primary}.log || return 1 + + # Bad object still known + rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + + # Can't upgrade with this set + ceph osd set nodeep-scrub + # Let map change propagate to OSDs + ceph tell osd.0 get_latest_osdmap + flush_pg_stats + sleep 5 + + # Fake a schedule scrub + ceph tell $pg scrub || return 1 + # Wait for schedule regular scrub + # to notice scrub and skip it + local found=false + for i in $(seq 14 -1 0) + do + sleep 1 + ! grep -q "Regular scrub skipped due to deep-scrub errors and nodeep-scrub set" $dir/osd.${primary}.log || { found=true ; break; } + echo Time left: $i seconds + done + test $found = "true" || return 1 + + # Bad object still known + rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + + flush_pg_stats + # Request a regular scrub and it will be done + pg_scrub $pg + grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1 + + # deep-scrub error is no longer present + rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1 +} + +function TEST_scrub_warning() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local scrubs=5 + local deep_scrubs=5 + local i1_day=86400 + local i7_days=$(calc $i1_day \* 7) + local i14_days=$(calc $i1_day \* 14) + local overdue=0.5 + local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) ) + local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) ) + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1 + run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1 + + for i in $(seq 1 $(expr $scrubs + $deep_scrubs)) + do + create_pool $poolname-$i 1 1 || return 1 + wait_for_clean || return 1 + if [ $i = "1" ]; + then + ceph osd pool set $poolname-$i scrub_max_interval $i14_days + fi + if [ $i = $(expr $scrubs + 1) ]; + then + ceph osd pool set $poolname-$i deep_scrub_interval $i14_days + fi + done + + # Only 1 osd + local primary=0 + + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + ceph config set global osd_scrub_interval_randomize_ratio 0 + ceph config set global osd_deep_scrub_randomize_ratio 0 + ceph config set global osd_scrub_max_interval ${i7_days} + ceph config set global osd_deep_scrub_interval ${i7_days} + + # Fake schedule scrubs + for i in $(seq 1 $scrubs) + do + if [ $i = "1" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + ceph tell ${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) || return 1 + done + # Fake schedule deep scrubs + for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs)) + do + if [ $i = "$(expr $scrubs + 1)" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + ceph tell ${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) || return 1 + done + flush_pg_stats + + ceph health + ceph health detail + ceph health | grep -q " pgs not deep-scrubbed in time" || return 1 + ceph health | grep -q " pgs not scrubbed in time" || return 1 + + # note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub + # time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for + # regular scrubbing. For now, we'll allow both responses. + COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l) + + if (( $COUNT != $scrubs && $COUNT != $(expr $scrubs+$deep_scrubs) )); then + ceph health detail | grep "not scrubbed since" + return 1 + fi + COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l) + if [ "$COUNT" != $deep_scrubs ]; then + ceph health detail | grep "not deep-scrubbed since" + return 1 + fi +} + +# +# Corrupt snapset in replicated pool +# +function TEST_corrupt_snapset_scrub_rep() { + local dir=$1 + local poolname=csr_pool + local total_objs=2 + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool foo 1 || return 1 + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + add_something $dir $poolname $objname || return 1 + + rados --pool $poolname setomapheader $objname hdr-$objname || return 1 + rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + local pg=$(get_pg $poolname ROBJ0) + local primary=$(get_primary $poolname ROBJ0) + + rados -p $poolname mksnap snap1 + echo -n head_of_snapshot_data > $dir/change + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + + # Alternate corruption between osd.0 and osd.1 + local osd=$(expr $i % 2) + + case $i in + 1) + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1 + ;; + + 2) + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1 + ;; + + esac + done + rm $dir/change + + pg_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 34, + "inconsistents": [ + { + "object": { + "name": "ROBJ1", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [ + "snapset_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "24'8", + "prior_version": "21'3", + "last_reqid": "client.4195.0:1", + "user_version": 8, + "size": 21, + "mtime": "2018-04-05 14:35:43.286117", + "local_mtime": "2018-04-05 14:35:43.288990", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x53acb008", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 21, + "snapset": { + "clones": [ + { + "overlap": "[]", + "size": 7, + "snap": 1, + "snaps": [ + 1 + ] + } + ], + "seq": 1 + } + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 21, + "snapset": { + "clones": [], + "seq": 0 + } + } + ] + }, + { + "object": { + "name": "ROBJ2", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "snapset_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ2", + "key": "", + "snapid": -2, + "hash": 2026323607, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "28'10", + "prior_version": "23'6", + "last_reqid": "client.4223.0:1", + "user_version": 10, + "size": 21, + "mtime": "2018-04-05 14:35:48.326856", + "local_mtime": "2018-04-05 14:35:48.328097", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x53acb008", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 21, + "snapset": { + "clones": [], + "seq": 0 + } + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 21, + "snapset": { + "clones": [ + { + "overlap": "[]", + "size": 7, + "snap": 1, + "snaps": [ + 1 + ] + } + ], + "seq": 1 + } + } + ] + } + ] +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save6.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + ERRORS=0 + declare -a err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ1:head : snapset inconsistent" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ2:head : snapset inconsistent" + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*:.*:::ROBJ1:1 : is an unexpected clone" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 3/4 objects, 1/2 clones, 3/4 dirty, 3/4 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 49/56 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 0 missing, 2 inconsistent objects" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 4 errors" + + for err_string in "${err_strings[@]}" + do + if ! grep -q "$err_string" $dir/osd.${primary}.log + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it +} + +function TEST_request_scrub_priority() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local OBJECTS=64 + local PGS=8 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0" + run_osd $dir 0 $ceph_osd_args || return 1 + + create_pool $poolname $PGS $PGS || return 1 + wait_for_clean || return 1 + + local osd=0 + add_something $dir $poolname $objname noscrub || return 1 + local primary=$(get_primary $poolname $objname) + local pg=$(get_pg $poolname $objname) + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + local otherpgs + for i in $(seq 0 $(expr $PGS - 1)) + do + opg="${poolid}.${i}" + if [ "$opg" = "$pg" ]; then + continue + fi + otherpgs="${otherpgs}${opg} " + local other_last_scrub=$(get_last_scrub_stamp $pg) + # Fake a schedule scrub + ceph tell $opg scrub $opg || return 1 + done + + sleep 15 + flush_pg_stats + + # Request a regular scrub and it will be done + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + ceph osd unset noscrub || return 1 + ceph osd unset nodeep-scrub || return 1 + + wait_for_scrub $pg "$last_scrub" + + for opg in $otherpgs $pg + do + wait_for_scrub $opg "$other_last_scrub" + done + + # Verify that the requested scrub ran first + grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1 +} + + +main osd-scrub-repair "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-repair.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-snaps.sh b/qa/standalone/scrub/osd-scrub-snaps.sh new file mode 100755 index 000000000..c543b48a1 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-snaps.sh @@ -0,0 +1,1188 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +# Test development and debugging +# Set to "yes" in order to ignore diff errors and save results to update test +getjson="no" + +jqfilter='.inconsistents' +sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print ( json.dumps(ud, sort_keys=True, indent=2) )' + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function create_scenario() { + local dir=$1 + local poolname=$2 + local TESTDATA=$3 + local osd=$4 + + SNAP=1 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj1 $TESTDATA + rados -p $poolname put obj5 $TESTDATA + rados -p $poolname put obj3 $TESTDATA + for i in `seq 6 14` + do rados -p $poolname put obj${i} $TESTDATA + done + + SNAP=2 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj5 $TESTDATA + + SNAP=3 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj3 $TESTDATA + + SNAP=4 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj5 $TESTDATA + rados -p $poolname put obj2 $TESTDATA + + SNAP=5 + rados -p $poolname mksnap snap${SNAP} + SNAP=6 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj5 $TESTDATA + + SNAP=7 + rados -p $poolname mksnap snap${SNAP} + + rados -p $poolname rm obj4 + rados -p $poolname rm obj16 + rados -p $poolname rm obj2 + + kill_daemons $dir TERM osd || return 1 + + # Don't need to use ceph_objectstore_tool() function because osd stopped + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" --force remove || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)" + OBJ5SAVE="$JSON" + # Starts with a snapmap + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1 + ceph-objectstore-tool --data-path $dir/${osd} --rmtype nosnapmap "$JSON" remove || return 1 + # Check that snapmap is stil there + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1 + rm -f $dir/drk.log + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)" + dd if=/dev/urandom of=$TESTDATA bs=256 count=18 + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)" + dd if=/dev/urandom of=$TESTDATA bs=256 count=15 + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1 + + # Starts with a snapmap + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)" + ceph-objectstore-tool --data-path $dir/${osd} --rmtype snapmap "$JSON" remove || return 1 + # Check that snapmap is now removed + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + ! grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1 + rm -f $dir/drk.log + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" rm-attr snapset || return 1 + + # Create a clone which isn't in snapset and doesn't have object info + JSON="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)" + dd if=/dev/urandom of=$TESTDATA bs=256 count=7 + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset corrupt || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset seq || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_size || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_overlap || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clones || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset head || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset snaps || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset size || return 1 + + echo "garbage" > $dir/bad + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-attr snapset $dir/bad || return 1 + rm -f $dir/bad + return 0 +} + +function TEST_scrub_snaps() { + local dir=$1 + local poolname=test + local OBJS=16 + local OSDS=1 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # All scrubs done manually. Don't want any unexpected scheduled scrubs. + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $OBJS` + do + rados -p $poolname put obj${i} $TESTDATA + done + + local primary=$(get_primary $poolname obj1) + + create_scenario $dir $poolname $TESTDATA $primary || return 1 + + rm -f $TESTDATA + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + activate_osd $dir $osd || return 1 + done + ceph tell osd.* config set osd_shallow_scrub_chunk_max 25 + ceph tell osd.* config set osd_shallow_scrub_chunk_min 5 + ceph tell osd.* config set osd_pg_stat_report_interval_max 1 + + + wait_for_clean || return 1 + + ceph tell osd.* config get osd_shallow_scrub_chunk_max + ceph tell osd.* config get osd_shallow_scrub_chunk_min + ceph tell osd.* config get osd_pg_stat_report_interval_max + ceph tell osd.* config get osd_scrub_chunk_max + ceph tell osd.* config get osd_scrub_chunk_min + + local pgid="${poolid}.0" + if ! pg_scrub "$pgid" ; then + return 1 + fi + + test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" = "2" || return 1 + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pgid || return 1 + + rados list-inconsistent-obj $pgid > $dir/json || return 1 + + # The injected snapshot errors with a single copy pool doesn't + # see object errors because all the issues are detected by + # comparing copies. + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 17, + "inconsistents": [] +} +EOF + + jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + + rados list-inconsistent-snapset $pgid > $dir/json || return 1 + + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj1" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj10" + }, + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj11" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj14" + }, + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj6" + }, + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj7" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj9" + }, + { + "errors": [ + "headless" + ], + "snap": 4, + "locator": "", + "nspace": "", + "name": "obj2" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 4, + "locator": "", + "nspace": "", + "name": "obj5" + }, + { + "errors": [ + "headless" + ], + "snap": 7, + "locator": "", + "nspace": "", + "name": "obj2" + }, + { + "errors": [ + "info_missing", + "headless" + ], + "snap": 7, + "locator": "", + "nspace": "", + "name": "obj5" + }, + { + "name": "obj10", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "????", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "extra clones": [ + 1 + ], + "errors": [ + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj11", + "snapset": { + "seq": 1, + "clones": [] + } + }, + { + "name": "obj14", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1033, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "errors": [ + "snapset_corrupted" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj15" + }, + { + "extra clones": [ + 7, + 4 + ], + "errors": [ + "snapset_missing", + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj2" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj3", + "snapset": { + "seq": 3, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + }, + { + "snap": 3, + "size": 256, + "overlap": "[]", + "snaps": [ + 3, + 2 + ] + } + ] + } + }, + { + "missing": [ + 7 + ], + "errors": [ + "clone_missing" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj4", + "snapset": { + "seq": 7, + "clones": [ + { + "snap": 7, + "size": 1032, + "overlap": "[]", + "snaps": [ + 7, + 6, + 5, + 4, + 3, + 2, + 1 + ] + } + ] + } + }, + { + "missing": [ + 2, + 1 + ], + "extra clones": [ + 7 + ], + "errors": [ + "extra_clones", + "clone_missing" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj5", + "snapset": { + "seq": 6, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + }, + { + "snap": 2, + "size": 256, + "overlap": "[]", + "snaps": [ + 2 + ] + }, + { + "snap": 4, + "size": 512, + "overlap": "[]", + "snaps": [ + 4, + 3 + ] + }, + { + "snap": 6, + "size": 1024, + "overlap": "[]", + "snaps": [ + 6, + 5 + ] + } + ] + } + }, + { + "extra clones": [ + 1 + ], + "errors": [ + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj6", + "snapset": { + "seq": 1, + "clones": [] + } + }, + { + "extra clones": [ + 1 + ], + "errors": [ + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj7", + "snapset": { + "seq": 0, + "clones": [] + } + }, + { + "errors": [ + "snapset_error" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj8", + "snapset": { + "seq": 0, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + } + }, + { + "name": "obj9", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": "????", + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + } + ], + "epoch": 20 +} +EOF + + jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save1.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1 + fi + + pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid') + pids="" + for pidfile in ${pidfiles} + do + pids+="$(cat $pidfile) " + done + + ERRORS=0 + + for i in `seq 1 7` + do + rados -p $poolname rmsnap snap$i + done + sleep 5 + local -i loop=0 + while ceph pg dump pgs | grep -q snaptrim; + do + if ceph pg dump pgs | grep -q snaptrim_error; + then + break + fi + sleep 2 + loop+=1 + if (( $loop >= 10 )) ; then + ERRORS=$(expr $ERRORS + 1) + break + fi + done + ceph pg dump pgs + + for pid in $pids + do + if ! kill -0 $pid + then + echo "OSD Crash occurred" + ERRORS=$(expr $ERRORS + 1) + fi + done + + kill_daemons $dir || return 1 + + declare -a err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* : is missing in clone_overlap" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : no '_' attr" + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : is an unexpected clone" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 : on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:2" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:1" + err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head : 2 missing clone[(]s[)]" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head : snaps.seq not set" + err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 : is an unexpected clone" + err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head : on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]" + err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 : is an unexpected clone" + err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head : no 'snapset' attr" + err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 : clone ignored due to missing snapset" + err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 : clone ignored due to missing snapset" + err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head : expected clone .*:::obj4:7" + err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head : 1 missing clone[(]s[)]" + err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 : is an unexpected clone" + err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 : is missing in clone_size" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 : is an unexpected clone" + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 : size 1032 != clone_size 1033" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 20 errors" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head : can't decode 'snapset' attr " + err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired" + + for err_string in "${err_strings[@]}" + do + if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null; + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +function _scrub_snaps_multi() { + local dir=$1 + local poolname=test + local OBJS=16 + local OSDS=2 + local which=$2 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # All scrubs done manually. Don't want any unexpected scheduled scrubs. + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $OBJS` + do + rados -p $poolname put obj${i} $TESTDATA + done + + local primary=$(get_primary $poolname obj1) + local replica=$(get_not_primary $poolname obj1) + + eval create_scenario $dir $poolname $TESTDATA \$$which || return 1 + + rm -f $TESTDATA + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + activate_osd $dir $osd || return 1 + done + + ceph tell osd.* config set osd_shallow_scrub_chunk_max 3 + ceph tell osd.* config set osd_shallow_scrub_chunk_min 3 + ceph tell osd.* config set osd_scrub_chunk_min 3 + ceph tell osd.* config set osd_pg_stat_report_interval_max 1 + wait_for_clean || return 1 + + local pgid="${poolid}.0" + if ! pg_scrub "$pgid" ; then + return 1 + fi + + test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" -gt "3" || return 1 + test "$(grep "_scan_snaps start" $dir/osd.${replica}.log | wc -l)" -gt "3" || return 1 + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pgid || return 1 + + rados list-inconsistent-obj $pgid --format=json-pretty + + rados list-inconsistent-snapset $pgid > $dir/json || return 1 + + # Since all of the snapshots on the primary is consistent there are no errors here + if [ $which = "replica" ]; + then + scruberrors="20" + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 23, + "inconsistents": [] +} +EOF + +else + scruberrors="30" + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 23, + "inconsistents": [ + { + "name": "obj10", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "size_mismatch" + ] + }, + { + "name": "obj11", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "headless" + ] + }, + { + "name": "obj14", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "size_mismatch" + ] + }, + { + "name": "obj6", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "headless" + ] + }, + { + "name": "obj7", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "headless" + ] + }, + { + "name": "obj9", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "size_mismatch" + ] + }, + { + "name": "obj5", + "nspace": "", + "locator": "", + "snap": 7, + "errors": [ + "info_missing", + "headless" + ] + }, + { + "name": "obj10", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "????", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "name": "obj11", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 1 + ] + }, + { + "name": "obj14", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1033, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "name": "obj5", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 6, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + }, + { + "snap": 2, + "size": 256, + "overlap": "[]", + "snaps": [ + 2 + ] + }, + { + "snap": 4, + "size": 512, + "overlap": "[]", + "snaps": [ + 4, + 3 + ] + }, + { + "snap": 6, + "size": 1024, + "overlap": "[]", + "snaps": [ + 6, + 5 + ] + } + ] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 7 + ] + }, + { + "name": "obj6", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 1 + ] + }, + { + "name": "obj7", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 0, + "clones": [] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 1 + ] + }, + { + "name": "obj8", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 0, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [ + "snapset_error" + ] + }, + { + "name": "obj9", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": "????", + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + } + ] +} +EOF +fi + + jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save1.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1 + fi + + pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid') + pids="" + for pidfile in ${pidfiles} + do + pids+="$(cat $pidfile) " + done + + ERRORS=0 + + # When removing snapshots with a corrupt replica, it crashes. + # See http://tracker.ceph.com/issues/23875 + if [ $which = "primary" ]; + then + for i in `seq 1 7` + do + rados -p $poolname rmsnap snap$i + done + sleep 5 + local -i loop=0 + while ceph pg dump pgs | grep -q snaptrim; + do + if ceph pg dump pgs | grep -q snaptrim_error; + then + break + fi + sleep 2 + loop+=1 + if (( $loop >= 10 )) ; then + ERRORS=$(expr $ERRORS + 1) + break + fi + done + fi + ceph pg dump pgs + + for pid in $pids + do + if ! kill -0 $pid + then + echo "OSD Crash occurred" + ERRORS=$(expr $ERRORS + 1) + fi + done + + kill_daemons $dir || return 1 + + declare -a err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj4:7 : missing" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj3:head : size 3840 != size 768 from auth oi" + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:1 : missing" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:2 : missing" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj5:4 : size 4608 != size 512 from auth oi" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7 : failed to pick suitable object info" + err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj1:head : missing" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors" + + for err_string in "${err_strings[@]}" + do + if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null; + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + # Check replica specific messages + declare -a rep_err_strings + osd=$(eval echo \$$which) + rep_err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired" + for err_string in "${rep_err_strings[@]}" + do + if ! grep "$err_string" $dir/osd.${osd}.log > /dev/null; + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +function TEST_scrub_snaps_replica() { + local dir=$1 + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1" + _scrub_snaps_multi $dir replica + err=$? + CEPH_ARGS=$ORIG_ARGS + return $err +} + +function TEST_scrub_snaps_primary() { + local dir=$1 + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1" + _scrub_snaps_multi $dir primary + err=$? + CEPH_ARGS=$ORIG_ARGS + return $err +} + +main osd-scrub-snaps "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-snaps.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh new file mode 100755 index 000000000..73f165380 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -0,0 +1,664 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_scrub_test() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + if [ "$otherosd" = "2" ]; + then + local anotherosd="0" + else + local anotherosd="2" + fi + + objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab + + local pgid="${poolid}.0" + pg_deep_scrub "$pgid" || return 1 + + ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1 + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1 + + ceph osd out $primary + wait_for_clean || return 1 + + pg_deep_scrub "$pgid" || return 1 + + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1 + + ceph osd in $primary + wait_for_clean || return 1 + + repair "$pgid" || return 1 + wait_for_clean || return 1 + + # This sets up the test after we've repaired with previous primary has old value + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1 + + ceph osd out $primary + wait_for_clean || return 1 + + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1 +} + +# Grab year-month-day +DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/" +DATEFORMAT="%Y-%m-%d" + +function check_dump_scrubs() { + local primary=$1 + local sched_time_check="$2" + local deadline_check="$3" + + DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)" + # use eval to drop double-quotes + eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time') + test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1 + # use eval to drop double-quotes + eval DEADLINE=$(echo $DS | jq '.[0].deadline') + test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1 +} + +function TEST_interval_changes() { + local poolname=test + local OSDS=2 + local objects=10 + # Don't assume how internal defaults are set + local day="$(expr 24 \* 60 \* 60)" + local week="$(expr $day \* 7)" + local min_interval=$day + local max_interval=$week + local WAIT_FOR_UPDATE=15 + + TESTDATA="testdata.$$" + + # This min scrub interval results in 30 seconds backoff time + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + + # Check initial settings from above (min 1 day, min 1 week) + check_dump_scrubs $primary "1 day" "1 week" || return 1 + + # Change global osd_scrub_min_interval to 2 days + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "1 week" || return 1 + + # Change global osd_scrub_max_interval to 2 weeks + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "2 week" || return 1 + + # Change pool osd_scrub_min_interval to 3 days + ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "2 week" || return 1 + + # Change pool osd_scrub_max_interval to 3 weeks + ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "3 week" || return 1 +} + +function TEST_scrub_extended_sleep() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + DAY=$(date +%w) + # Handle wrap + if [ "$DAY" -ge "4" ]; + then + DAY="0" + fi + # Start after 2 days in case we are near midnight + DAY_START=$(expr $DAY + 2) + DAY_END=$(expr $DAY + 3) + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_sleep=0 \ + --osd_scrub_extended_sleep=20 \ + --bluestore_cache_autotune=false \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_begin_week_day=$DAY_START \ + --osd_scrub_end_week_day=$DAY_END \ + || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + + # Trigger a scrub on a PG + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph tell $pgid scrub || return 1 + + # Allow scrub to start extended sleep + PASSED="false" + for ((i=0; i < 15; i++)); do + if grep -q "scrub state.*, sleeping" $dir/osd.${primary}.log + then + PASSED="true" + break + fi + sleep 1 + done + + # Check that extended sleep was triggered + if [ $PASSED = "false" ]; + then + return 1 + fi + + # release scrub to run after extended sleep finishes + ceph tell osd.$primary config set osd_scrub_begin_week_day 0 + ceph tell osd.$primary config set osd_scrub_end_week_day 0 + + # Due to extended sleep, the scrub should not be done within 20 seconds + # but test up to 10 seconds and make sure it happens by 25 seconds. + count=0 + PASSED="false" + for ((i=0; i < 25; i++)); do + count=$(expr $count + 1) + if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then + # Did scrub run too soon? + if [ $count -lt "10" ]; + then + return 1 + fi + PASSED="true" + break + fi + sleep 1 + done + + # Make sure scrub eventually ran + if [ $PASSED = "false" ]; + then + return 1 + fi +} + +function _scrub_abort() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=1000 + local type=$2 + + TESTDATA="testdata.$$" + if test $type = "scrub"; + then + stopscrub="noscrub" + check="noscrub" + else + stopscrub="nodeep-scrub" + check="nodeep_scrub" + fi + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + # Set scheduler to "wpq" until there's a reliable way to query scrub + # states with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" + # overrides the scrub sleep to 0 and as a result the checks in the + # test fail. + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_sleep=5.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_op_queue=wpq || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local pgid="${poolid}.0" + + ceph tell $pgid $type || return 1 + # deep-scrub won't start without scrub noticing + if [ "$type" = "deep_scrub" ]; + then + ceph tell $pgid scrub || return 1 + fi + + # Wait for scrubbing to start + set -o pipefail + found="no" + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing" + then + found="yes" + #ceph pg dump pgs + break + fi + done + set +o pipefail + + if test $found = "no"; + then + echo "Scrubbing never started" + return 1 + fi + + ceph osd set $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd set noscrub + fi + + # Wait for scrubbing to end + set -o pipefail + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing" + then + continue + fi + #ceph pg dump pgs + break + done + set +o pipefail + + sleep 5 + + if ! grep "$check set, aborting" $dir/osd.${primary}.log + then + echo "Abort not seen in log" + return 1 + fi + + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph config set osd "osd_scrub_sleep" "0.1" + + ceph osd unset $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd unset noscrub + fi + TIMEOUT=$(($objects / 2)) + wait_for_scrub $pgid "$last_scrub" || return 1 +} + +function TEST_scrub_abort() { + local dir=$1 + _scrub_abort $dir scrub +} + +function TEST_deep_scrub_abort() { + local dir=$1 + _scrub_abort $dir deep_scrub +} + +function TEST_scrub_permit_time() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//') + local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//') + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --bluestore_cache_autotune=false \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_begin_hour=$scrub_begin_hour \ + --osd_scrub_end_hour=$scrub_end_hour || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + + # Trigger a scrub on a PG + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pgid) + # If we don't specify an amount of time to subtract from + # current time to set last_scrub_stamp, it sets the deadline + # back by osd_max_interval which would cause the time permit checking + # to be skipped. Set back 1 day, the default scrub_min_interval. + ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1 + + # Scrub should not run + for ((i=0; i < 30; i++)); do + if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done +} + +# a test to recreate the problem described in bug #52901 - setting 'noscrub' +# without explicitly preventing deep scrubs made the PG 'unscrubable'. +# Fixed by PR#43521 +function TEST_just_deep_scrubs() { + local dir=$1 + local -A cluster_conf=( + ['osds_num']="3" + ['pgs_in_pool']="4" + ['pool_name']="test" + ) + + standard_scrub_cluster $dir cluster_conf + local poolid=${cluster_conf['pool_id']} + local poolname=${cluster_conf['pool_name']} + echo "Pool: $poolname : $poolid" + + TESTDATA="testdata.$$" + local objects=15 + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # set both 'no scrub' & 'no deep-scrub', then request a deep-scrub. + # we do not expect to see the scrub scheduled. + + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + sleep 6 # the 'noscrub' command takes a long time to reach the OSDs + local now_is=`date -I"ns"` + declare -A sched_data + local pgid="${poolid}.2" + + # turn on the publishing of test data in the 'scrubber' section of 'pg query' output + set_query_debug $pgid + + extract_published_sch $pgid $now_is $now_is sched_data + local saved_last_stamp=${sched_data['query_last_stamp']} + local dbg_counter_at_start=${sched_data['query_scrub_seq']} + echo "test counter @ start: $dbg_counter_at_start" + + ceph pg $pgid deep_scrub + + sleep 5 # 5s is the 'pg dump' interval + declare -A sc_data_2 + extract_published_sch $pgid $now_is $now_is sc_data_2 + echo "test counter @ should show no change: " ${sc_data_2['query_scrub_seq']} + (( ${sc_data_2['dmp_last_duration']} == 0)) || return 1 + (( ${sc_data_2['query_scrub_seq']} == $dbg_counter_at_start)) || return 1 + + # unset the 'no deep-scrub'. Deep scrubbing should start now. + ceph osd unset nodeep-scrub || return 1 + sleep 5 + declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" ) + sc_data_2=() + echo "test counter @ should be higher than before the unset: " ${sc_data_2['query_scrub_seq']} + wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sc_data_2 || return 1 +} + +function TEST_dump_scrub_schedule() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_backoff_ratio=0.0 \ + --osd_op_queue=wpq \ + --osd_scrub_sleep=0.2" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd $ceph_osd_args|| return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local pgid="${poolid}.0" + local now_is=`date -I"ns"` + + # before the scrubbing starts + + # last scrub duration should be 0. The scheduling data should show + # a time in the future: + # e.g. 'periodic scrub scheduled @ 2021-10-12T20:32:43.645168+0000' + + declare -A expct_starting=( ['query_active']="false" ['query_is_future']="true" ['query_schedule']="scrub scheduled" ) + declare -A sched_data + extract_published_sch $pgid $now_is "2019-10-12T20:32:43.645168+0000" sched_data + schedule_against_expected sched_data expct_starting "initial" + (( ${sched_data['dmp_last_duration']} == 0)) || return 1 + echo "last-scrub --- " ${sched_data['query_last_scrub']} + + # + # step 1: scrub once (mainly to ensure there is no urgency to scrub) + # + + saved_last_stamp=${sched_data['query_last_stamp']} + ceph tell osd.* config set osd_scrub_sleep "0" + ceph pg deep-scrub $pgid + ceph pg scrub $pgid + + # wait for the 'last duration' entries to change. Note that the 'dump' one will need + # up to 5 seconds to sync + + sleep 5 + sched_data=() + declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" ) + wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sched_data || return 1 + # verify that 'pg dump' also shows the change in last_scrub_duration + sched_data=() + declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" ) + wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1 + + sleep 2 + + # + # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub + # scheduled for the future' value + # + + ceph tell osd.* config set osd_scrub_chunk_max "3" || return 1 + ceph tell osd.* config set osd_scrub_sleep "1.0" || return 1 + ceph osd set noscrub || return 1 + sleep 2 + saved_last_stamp=${sched_data['query_last_stamp']} + + ceph pg $pgid scrub + sleep 1 + sched_data=() + declare -A expct_scrub_peri_sched=( ['query_is_future']="false" ) + wait_any_cond $pgid 10 $saved_last_stamp expct_scrub_peri_sched "waitingBeingScheduled" sched_data || return 1 + + # note: the induced change in 'last_scrub_stamp' that we've caused above, is by itself not a publish-stats + # trigger. Thus it might happen that the information in 'pg dump' will not get updated here. Do not expect + # 'dmp_is_future' to follow 'query_is_future' without a good reason + ## declare -A expct_scrub_peri_sched_dmp=( ['dmp_is_future']="false" ) + ## wait_any_cond $pgid 15 $saved_last_stamp expct_scrub_peri_sched_dmp "waitingBeingScheduled" sched_data || echo "must be fixed" + + # + # step 3: allow scrubs. Watch for the conditions during the scrubbing + # + + saved_last_stamp=${sched_data['query_last_stamp']} + ceph osd unset noscrub + + declare -A cond_active=( ['query_active']="true" ) + sched_data=() + wait_any_cond $pgid 10 $saved_last_stamp cond_active "WaitingActive " sched_data || return 1 + + # check for pg-dump to show being active. But if we see 'query_active' being reset - we've just + # missed it. + declare -A cond_active_dmp=( ['dmp_state_has_scrubbing']="true" ['query_active']="false" ) + sched_data=() + wait_any_cond $pgid 10 $saved_last_stamp cond_active_dmp "WaitingActive " sched_data || return 1 +} + +function TEST_pg_dump_objects_scrubbed() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + local timeout=10 + + TESTDATA="testdata.$$" + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local pgid="${poolid}.0" + #Trigger a scrub on a PG + pg_scrub $pgid || return 1 + test "$(ceph pg $pgid query | jq '.info.stats.objects_scrubbed')" '=' $objects || return 1 + + teardown $dir || return 1 +} + +main osd-scrub-test "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-test.sh" +# End: diff --git a/qa/standalone/scrub/osd-unexpected-clone.sh b/qa/standalone/scrub/osd-unexpected-clone.sh new file mode 100755 index 000000000..6895bfee6 --- /dev/null +++ b/qa/standalone/scrub/osd-unexpected-clone.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Intel <contact@intel.com.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Xiaoxi Chen <xiaoxi.chen@intel.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_recover_unexpected() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 1 + rados -p foo put foo /etc/passwd + rados -p foo mksnap snap + rados -p foo put foo /etc/group + + wait_for_clean || return 1 + + local osd=$(get_primary foo foo) + + JSON=`objectstore_tool $dir $osd --op list foo | grep snapid.:1` + echo "JSON is $JSON" + rm -f $dir/_ $dir/data + objectstore_tool $dir $osd "$JSON" get-attr _ > $dir/_ || return 1 + objectstore_tool $dir $osd "$JSON" get-bytes $dir/data || return 1 + + rados -p foo rmsnap snap + + sleep 5 + + objectstore_tool $dir $osd "$JSON" set-bytes $dir/data || return 1 + objectstore_tool $dir $osd "$JSON" set-attr _ $dir/_ || return 1 + + sleep 5 + + ceph pg repair 1.0 || return 1 + + sleep 10 + + ceph log last + + # make sure osds are still up + timeout 60 ceph tell osd.0 version || return 1 + timeout 60 ceph tell osd.1 version || return 1 + timeout 60 ceph tell osd.2 version || return 1 +} + + +main osd-unexpected-clone "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh new file mode 100644 index 000000000..6816d71de --- /dev/null +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -0,0 +1,302 @@ +#!/usr/bin/env bash +# @file scrub-helpers.sh +# @brief a collection of bash functions useful for scrub standalone tests +# + +# extract_published_sch() +# +# Use the output from both 'ceph pg dump pgs' and 'ceph pg x.x query' commands to determine +# the published scrub scheduling status of a given PG. +# +# $1: pg id +# $2: 'current' time to compare to +# $3: an additional time-point to compare to +# $4: [out] dictionary +# +function extract_published_sch() { + local pgn="$1" + local -n dict=$4 # a ref to the in/out dictionary + local current_time=$2 + local extra_time=$3 + local extr_dbg=1 # note: 3 and above leave some temp files around + + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + (( extr_dbg >= 3 )) && ceph pg dump pgs -f json-pretty >> /tmp/a_dmp$$ + (( extr_dbg >= 3 )) && ceph pg $1 query -f json-pretty >> /tmp/a_qry$$ + + from_dmp=`ceph pg dump pgs -f json-pretty | jq -r --arg pgn "$pgn" --arg extra_dt "$extra_time" --arg current_dt "$current_time" '[ + [[.pg_stats[]] | group_by(.pg_stats)][0][0] | + [.[] | + select(has("pgid") and .pgid == $pgn) | + + (.dmp_stat_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|first) else . end)) | + (.dmp_when_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|last) else "0" end)) | + + [ { + dmp_pg_state: .state, + dmp_state_has_scrubbing: (.state | test(".*scrub.*";"i")), + dmp_last_duration:.last_scrub_duration, + dmp_schedule: .dmp_stat_part, + dmp_schedule_at: .dmp_when_part, + dmp_is_future: ( .dmp_when_part > $current_dt ), + dmp_vs_date: ( .dmp_when_part > $extra_dt ), + dmp_reported_epoch: .reported_epoch, + dmp_seq: .reported_seq + }] ]][][][]'` + + (( extr_dbg >= 2 )) && echo "from pg dump pg: $from_dmp" + (( extr_dbg >= 2 )) && echo "query output:" + (( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}' + + from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time" --arg spt "'" ' + . | + (.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) | + (.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) | + (.q_when_is_future=(.q_when_part > $current_dt)) | + (.q_vs_date=(.q_when_part > $extra_dt)) | + { + query_epoch: .epoch, + query_seq: .info.stats.reported_seq, + query_active: (.scrubber | if has("active") then .active else "bug" end), + query_schedule: .q_stat_part, + query_schedule_at: .q_when_part, + query_last_duration: .info.stats.last_scrub_duration, + query_last_stamp: .info.history.last_scrub_stamp, + query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ), + query_is_future: .q_when_is_future, + query_vs_date: .q_vs_date, + query_scrub_seq: .scrubber.test_sequence + } + '` + (( extr_dbg >= 1 )) && echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"' + + # note that using a ref to an associative array directly is tricky. Instead - we are copying: + local -A dict_src=`echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'` + dict=() + for k in "${!dict_src[@]}"; do dict[$k]=${dict_src[$k]}; done + + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + +# query the PG, until any of the conditions in the 'expected' array are met +# +# A condition may be negated by an additional entry in the 'expected' array. Its +# form should be: +# key: the original key, with a "_neg" suffix; +# Value: not checked +# +# $1: pg id +# $2: max retries +# $3: a date to use in comparisons +# $4: set of K/V conditions +# $5: debug message +# $6: [out] the results array +function wait_any_cond() { + local pgid="$1" + local retries=$2 + local cmp_date=$3 + local -n ep=$4 + local -n out_array=$6 + local -A sc_data + local extr_dbg=2 + + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + local now_is=`date -I"ns"` + (( extr_dbg >= 2 )) && echo "waiting for any condition ($5): pg:$pgid dt:$cmp_date ($retries retries)" + + for i in $(seq 1 $retries) + do + sleep 0.5 + extract_published_sch $pgid $now_is $cmp_date sc_data + (( extr_dbg >= 4 )) && echo "${sc_data['dmp_last_duration']}" + (( extr_dbg >= 4 )) && echo "----> loop: $i ~ ${sc_data['dmp_last_duration']} / " ${sc_data['query_vs_date']} " / ${sc_data['dmp_is_future']}" + (( extr_dbg >= 2 )) && echo "--> loop: $i ~ ${sc_data['query_active']} / ${sc_data['query_seq']} / ${sc_data['dmp_seq']} " \ + "/ ${sc_data['query_is_future']} / ${sc_data['query_last_stamp']} / ${sc_data['query_schedule']} %%% ${!ep[@]}" + + # perform schedule_against_expected(), but with slightly different out-messages behaviour + for k_ref in "${!ep[@]}" + do + (( extr_dbg >= 3 )) && echo "key is $k_ref" + # is this a real key, or just a negation flag for another key?? + [[ $k_ref =~ "_neg" ]] && continue + + local act_val=${sc_data[$k_ref]} + local exp_val=${ep[$k_ref]} + + # possible negation? look for a matching key + local neg_key="${k_ref}_neg" + (( extr_dbg >= 3 )) && echo "neg-key is $neg_key" + if [ -v 'ep[$neg_key]' ]; then + is_neg=1 + else + is_neg=0 + fi + + (( extr_dbg >= 1 )) && echo "key is $k_ref: negation:$is_neg # expected: $exp_val # in actual: $act_val" + is_eq=0 + [[ $exp_val == $act_val ]] && is_eq=1 + if (($is_eq ^ $is_neg)) + then + echo "$5 - '$k_ref' actual value ($act_val) matches expected ($exp_val) (negation: $is_neg)" + for k in "${!sc_data[@]}"; do out_array[$k]=${sc_data[$k]}; done + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 0 + fi + done + done + + echo "$5: wait_any_cond(): failure. Note: query-active=${sc_data['query_active']}" + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 1 +} + + +# schedule_against_expected() +# +# Compare the scrub scheduling state collected by extract_published_sch() to a set of expected values. +# All values are expected to match. +# +# $1: the published scheduling state +# $2: a set of conditions to verify +# $3: text to be echoed for a failed match +# +function schedule_against_expected() { + local -n dict=$1 # a ref to the published state + local -n ep=$2 # the expected results + local extr_dbg=1 + + # turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + (( extr_dbg >= 1 )) && echo "-- - comparing:" + for k_ref in "${!ep[@]}" + do + local act_val=${dict[$k_ref]} + local exp_val=${ep[$k_ref]} + (( extr_dbg >= 1 )) && echo "key is " $k_ref " expected: " $exp_val " in actual: " $act_val + if [[ $exp_val != $act_val ]] + then + echo "$3 - '$k_ref' actual value ($act_val) differs from expected ($exp_val)" + echo '####################################################^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^' + + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 1 + fi + done + + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 0 +} + + +# Start the cluster "nodes" and create a pool for testing. +# +# The OSDs are started with a set of parameters aimed in creating a repeatable +# and stable scrub sequence: +# - no scrub randomizations/backoffs +# - no autoscaler +# +# $1: the test directory +# $2: [in/out] an array of configuration values +# +# The function adds/updates the configuration dictionary with the name of the +# pool created, and its ID. +# +# Argument 2 might look like this: +# +# declare -A test_conf=( +# ['osds_num']="3" +# ['pgs_in_pool']="7" +# ['extras']="--extra1 --extra2" +# ['pool_name']="testpl" +# ) +function standard_scrub_cluster() { + local dir=$1 + local -n args=$2 + + local OSDS=${args['osds_num']:-"3"} + local pg_num=${args['pgs_in_pool']:-"8"} + local poolname="${args['pool_name']:-test}" + args['pool_name']=$poolname + local extra_pars=${args['extras']} + local debug_msg=${args['msg']:-"dbg"} + + # turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_backoff_ratio=0.0 \ + --osd_pool_default_pg_autoscale_mode=off \ + --osd_pg_stat_report_interval_max=1 \ + $extra_pars" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd $(echo $ceph_osd_args) || return 1 + done + + create_pool $poolname $pg_num $pg_num + wait_for_clean || return 1 + + # update the in/out 'args' with the ID of the new pool + sleep 1 + name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'` + echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id" + args['pool_id']="${name_n_id##* }" + args['osd_args']=$ceph_osd_args + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + + +# Start the cluster "nodes" and create a pool for testing - wpq version. +# +# A variant of standard_scrub_cluster() that selects the wpq scheduler and sets a value to +# osd_scrub_sleep. To be used when the test is attempting to "catch" the scrubber during an +# ongoing scrub. +# +# See standard_scrub_cluster() for more details. +# +# $1: the test directory +# $2: [in/out] an array of configuration values +# $3: osd_scrub_sleep +# +# The function adds/updates the configuration dictionary with the name of the +# pool created, and its ID. +function standard_scrub_wpq_cluster() { + local dir=$1 + local -n conf=$2 + local osd_sleep=$3 + + conf['extras']=" --osd_op_queue=wpq --osd_scrub_sleep=$osd_sleep ${conf['extras']}" + + standard_scrub_cluster $dir conf || return 1 +} + + +# A debug flag is set for the PG specified, causing the 'pg query' command to display +# an additional 'scrub sessions counter' field. +# +# $1: PG id +# +function set_query_debug() { + local pgid=$1 + local prim_osd=`ceph pg dump pgs_brief | \ + awk -v pg="^$pgid" -n -e '$0 ~ pg { print(gensub(/[^0-9]*([0-9]+).*/,"\\\\1","g",$5)); }' ` + + echo "Setting scrub debug data. Primary for $pgid is $prim_osd" + CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.$prim_osd) \ + scrubdebug $pgid set sessions +} + diff --git a/qa/standalone/special/ceph_objectstore_tool.py b/qa/standalone/special/ceph_objectstore_tool.py new file mode 100755 index 000000000..98a2c8723 --- /dev/null +++ b/qa/standalone/special/ceph_objectstore_tool.py @@ -0,0 +1,2045 @@ +#!/usr/bin/python3 + +from subprocess import call, check_output, DEVNULL + +import filecmp +import os +import subprocess +import math +import time +import sys +import re +import logging +import json +import tempfile +import platform + +logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING, + datefmt="%FT%T") + + +def wait_for_health(): + print("Wait for health_ok...", end="") + tries = 0 + while call("{path}/ceph health 2> /dev/null | grep -v 'HEALTH_OK\|HEALTH_WARN' > /dev/null".format(path=CEPH_BIN), shell=True) == 0: + tries += 1 + if tries == 150: + raise Exception("Time exceeded to go to health") + time.sleep(1) + print("DONE") + + +def get_pool_id(name, nullfd): + cmd = "{path}/ceph osd pool stats {pool}".format(pool=name, path=CEPH_BIN).split() + # pool {pool} id # .... grab the 4 field + return check_output(cmd, stderr=nullfd).decode().split()[3] + + +# return a list of unique PGS given an osd subdirectory +def get_osd_pgs(SUBDIR, ID): + PGS = [] + if ID: + endhead = re.compile("{id}.*_head$".format(id=ID)) + DIR = os.path.join(SUBDIR, "current") + PGS += [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and (ID is None or endhead.match(f))] + PGS = [re.sub("_head", "", p) for p in PGS if "_head" in p] + return PGS + + +# return a sorted list of unique PGs given a directory +def get_pgs(DIR, ID): + OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0] + PGS = [] + for d in OSDS: + SUBDIR = os.path.join(DIR, d) + PGS += get_osd_pgs(SUBDIR, ID) + return sorted(set(PGS)) + + +# return a sorted list of PGS a subset of ALLPGS that contain objects with prefix specified +def get_objs(ALLPGS, prefix, DIR, ID): + OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0] + PGS = [] + for d in OSDS: + DIRL2 = os.path.join(DIR, d) + SUBDIR = os.path.join(DIRL2, "current") + for p in ALLPGS: + PGDIR = p + "_head" + if not os.path.isdir(os.path.join(SUBDIR, PGDIR)): + continue + FINALDIR = os.path.join(SUBDIR, PGDIR) + # See if there are any objects there + if any(f for f in [val for _, _, fl in os.walk(FINALDIR) for val in fl] if f.startswith(prefix)): + PGS += [p] + return sorted(set(PGS)) + + +# return a sorted list of OSDS which have data from a given PG +def get_osds(PG, DIR): + ALLOSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0] + OSDS = [] + for d in ALLOSDS: + DIRL2 = os.path.join(DIR, d) + SUBDIR = os.path.join(DIRL2, "current") + PGDIR = PG + "_head" + if not os.path.isdir(os.path.join(SUBDIR, PGDIR)): + continue + OSDS += [d] + return sorted(OSDS) + + +def get_lines(filename): + tmpfd = open(filename, "r") + line = True + lines = [] + while line: + line = tmpfd.readline().rstrip('\n') + if line: + lines += [line] + tmpfd.close() + os.unlink(filename) + return lines + + +def cat_file(level, filename): + if level < logging.getLogger().getEffectiveLevel(): + return + print("File: " + filename) + with open(filename, "r") as f: + while True: + line = f.readline().rstrip('\n') + if not line: + break + print(line) + print("<EOF>") + + +def vstart(new, opt="-o osd_pool_default_pg_autoscale_mode=off"): + print("vstarting....", end="") + NEW = new and "-n" or "-k" + call("MON=1 OSD=4 MDS=0 MGR=1 CEPH_PORT=7400 MGR_PYTHON_PATH={path}/src/pybind/mgr {path}/src/vstart.sh --filestore --short -l {new} -d {opt} > /dev/null 2>&1".format(new=NEW, opt=opt, path=CEPH_ROOT), shell=True) + print("DONE") + + +def test_failure(cmd, errmsg, tty=False): + if tty: + try: + ttyfd = open("/dev/tty", "rwb") + except Exception as e: + logging.info(str(e)) + logging.info("SKIP " + cmd) + return 0 + TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid()) + tmpfd = open(TMPFILE, "wb") + + logging.debug(cmd) + if tty: + ret = call(cmd, shell=True, stdin=ttyfd, stdout=ttyfd, stderr=tmpfd) + ttyfd.close() + else: + ret = call(cmd, shell=True, stderr=tmpfd) + tmpfd.close() + if ret == 0: + logging.error(cmd) + logging.error("Should have failed, but got exit 0") + return 1 + lines = get_lines(TMPFILE) + matched = [ l for l in lines if errmsg in l ] + if any(matched): + logging.info("Correctly failed with message \"" + matched[0] + "\"") + return 0 + else: + logging.error("Command: " + cmd ) + logging.error("Bad messages to stderr \"" + str(lines) + "\"") + logging.error("Expected \"" + errmsg + "\"") + return 1 + + +def get_nspace(num): + if num == 0: + return "" + return "ns{num}".format(num=num) + + +def verify(DATADIR, POOL, NAME_PREFIX, db): + TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid()) + ERRORS = 0 + for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(NAME_PREFIX) == 0]: + nsfile = rawnsfile.split("__")[0] + clone = rawnsfile.split("__")[1] + nspace = nsfile.split("-")[0] + file = nsfile.split("-")[1] + # Skip clones + if clone != "head": + continue + path = os.path.join(DATADIR, rawnsfile) + try: + os.unlink(TMPFILE) + except: + pass + cmd = "{path}/rados -p {pool} -N '{nspace}' get {file} {out}".format(pool=POOL, file=file, out=TMPFILE, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL) + cmd = "diff -q {src} {result}".format(src=path, result=TMPFILE) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("{file} data not imported properly".format(file=file)) + ERRORS += 1 + try: + os.unlink(TMPFILE) + except: + pass + for key, val in db[nspace][file]["xattr"].items(): + cmd = "{path}/rados -p {pool} -N '{nspace}' getxattr {name} {key}".format(pool=POOL, name=file, key=key, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + getval = check_output(cmd, shell=True, stderr=DEVNULL).decode() + logging.debug("getxattr {key} {val}".format(key=key, val=getval)) + if getval != val: + logging.error("getxattr of key {key} returned wrong val: {get} instead of {orig}".format(key=key, get=getval, orig=val)) + ERRORS += 1 + continue + hdr = db[nspace][file].get("omapheader", "") + cmd = "{path}/rados -p {pool} -N '{nspace}' getomapheader {name} {file}".format(pool=POOL, name=file, nspace=nspace, file=TMPFILE, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=DEVNULL) + if ret != 0: + logging.error("rados getomapheader returned {ret}".format(ret=ret)) + ERRORS += 1 + else: + getlines = get_lines(TMPFILE) + assert(len(getlines) == 0 or len(getlines) == 1) + if len(getlines) == 0: + gethdr = "" + else: + gethdr = getlines[0] + logging.debug("header: {hdr}".format(hdr=gethdr)) + if gethdr != hdr: + logging.error("getomapheader returned wrong val: {get} instead of {orig}".format(get=gethdr, orig=hdr)) + ERRORS += 1 + for key, val in db[nspace][file]["omap"].items(): + cmd = "{path}/rados -p {pool} -N '{nspace}' getomapval {name} {key} {file}".format(pool=POOL, name=file, key=key, nspace=nspace, file=TMPFILE, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=DEVNULL) + if ret != 0: + logging.error("getomapval returned {ret}".format(ret=ret)) + ERRORS += 1 + continue + getlines = get_lines(TMPFILE) + if len(getlines) != 1: + logging.error("Bad data from getomapval {lines}".format(lines=getlines)) + ERRORS += 1 + continue + getval = getlines[0] + logging.debug("getomapval {key} {val}".format(key=key, val=getval)) + if getval != val: + logging.error("getomapval returned wrong val: {get} instead of {orig}".format(get=getval, orig=val)) + ERRORS += 1 + try: + os.unlink(TMPFILE) + except: + pass + return ERRORS + + +def check_journal(jsondict): + errors = 0 + if 'header' not in jsondict: + logging.error("Key 'header' not in dump-journal") + errors += 1 + elif 'max_size' not in jsondict['header']: + logging.error("Key 'max_size' not in dump-journal header") + errors += 1 + else: + print("\tJournal max_size = {size}".format(size=jsondict['header']['max_size'])) + if 'entries' not in jsondict: + logging.error("Key 'entries' not in dump-journal output") + errors += 1 + elif len(jsondict['entries']) == 0: + logging.info("No entries in journal found") + else: + errors += check_journal_entries(jsondict['entries']) + return errors + + +def check_journal_entries(entries): + errors = 0 + for enum in range(len(entries)): + if 'offset' not in entries[enum]: + logging.error("No 'offset' key in entry {e}".format(e=enum)) + errors += 1 + if 'seq' not in entries[enum]: + logging.error("No 'seq' key in entry {e}".format(e=enum)) + errors += 1 + if 'transactions' not in entries[enum]: + logging.error("No 'transactions' key in entry {e}".format(e=enum)) + errors += 1 + elif len(entries[enum]['transactions']) == 0: + logging.error("No transactions found in entry {e}".format(e=enum)) + errors += 1 + else: + errors += check_entry_transactions(entries[enum], enum) + return errors + + +def check_entry_transactions(entry, enum): + errors = 0 + for tnum in range(len(entry['transactions'])): + if 'trans_num' not in entry['transactions'][tnum]: + logging.error("Key 'trans_num' missing from entry {e} trans {t}".format(e=enum, t=tnum)) + errors += 1 + elif entry['transactions'][tnum]['trans_num'] != tnum: + ft = entry['transactions'][tnum]['trans_num'] + logging.error("Bad trans_num ({ft}) entry {e} trans {t}".format(ft=ft, e=enum, t=tnum)) + errors += 1 + if 'ops' not in entry['transactions'][tnum]: + logging.error("Key 'ops' missing from entry {e} trans {t}".format(e=enum, t=tnum)) + errors += 1 + else: + errors += check_transaction_ops(entry['transactions'][tnum]['ops'], enum, tnum) + return errors + + +def check_transaction_ops(ops, enum, tnum): + if len(ops) == 0: + logging.warning("No ops found in entry {e} trans {t}".format(e=enum, t=tnum)) + errors = 0 + for onum in range(len(ops)): + if 'op_num' not in ops[onum]: + logging.error("Key 'op_num' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum)) + errors += 1 + elif ops[onum]['op_num'] != onum: + fo = ops[onum]['op_num'] + logging.error("Bad op_num ({fo}) from entry {e} trans {t} op {o}".format(fo=fo, e=enum, t=tnum, o=onum)) + errors += 1 + if 'op_name' not in ops[onum]: + logging.error("Key 'op_name' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum)) + errors += 1 + return errors + + +def test_dump_journal(CFSD_PREFIX, osds): + ERRORS = 0 + pid = os.getpid() + TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid) + + for osd in osds: + # Test --op dump-journal by loading json + cmd = (CFSD_PREFIX + "--op dump-journal --format json").format(osd=osd) + logging.debug(cmd) + tmpfd = open(TMPFILE, "wb") + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + continue + tmpfd.close() + tmpfd = open(TMPFILE, "r") + jsondict = json.load(tmpfd) + tmpfd.close() + os.unlink(TMPFILE) + + journal_errors = check_journal(jsondict) + if journal_errors != 0: + logging.error(jsondict) + ERRORS += journal_errors + + return ERRORS + +CEPH_BUILD_DIR = os.environ.get('CEPH_BUILD_DIR') +CEPH_BIN = os.environ.get('CEPH_BIN') +CEPH_ROOT = os.environ.get('CEPH_ROOT') + +if not CEPH_BUILD_DIR: + CEPH_BUILD_DIR=os.getcwd() + os.putenv('CEPH_BUILD_DIR', CEPH_BUILD_DIR) + CEPH_BIN=os.path.join(CEPH_BUILD_DIR, 'bin') + os.putenv('CEPH_BIN', CEPH_BIN) + CEPH_ROOT=os.path.dirname(CEPH_BUILD_DIR) + os.putenv('CEPH_ROOT', CEPH_ROOT) + CEPH_LIB=os.path.join(CEPH_BUILD_DIR, 'lib') + os.putenv('CEPH_LIB', CEPH_LIB) + +try: + os.mkdir("td") +except: + pass # ok if this is already there +CEPH_DIR = os.path.join(CEPH_BUILD_DIR, os.path.join("td", "cot_dir")) +CEPH_CONF = os.path.join(CEPH_DIR, 'ceph.conf') + +def kill_daemons(): + call("{path}/init-ceph -c {conf} stop > /dev/null 2>&1".format(conf=CEPH_CONF, path=CEPH_BIN), shell=True) + + +def check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME): + repcount = 0 + ERRORS = 0 + for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(SPLIT_NAME) == 0]: + nsfile = rawnsfile.split("__")[0] + clone = rawnsfile.split("__")[1] + nspace = nsfile.split("-")[0] + file = nsfile.split("-")[1] + "__" + clone + # Skip clones + if clone != "head": + continue + path = os.path.join(DATADIR, rawnsfile) + tmpfd = open(TMPFILE, "wb") + cmd = "find {dir} -name '{file}_*_{nspace}_*'".format(dir=OSDDIR, file=file, nspace=nspace) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret: + logging.critical("INTERNAL ERROR") + return 1 + tmpfd.close() + obj_locs = get_lines(TMPFILE) + if len(obj_locs) == 0: + logging.error("Can't find imported object {name}".format(name=file)) + ERRORS += 1 + for obj_loc in obj_locs: + # For btrfs skip snap_* dirs + if re.search("/snap_[0-9]*/", obj_loc) is not None: + continue + repcount += 1 + cmd = "diff -q {src} {obj_loc}".format(src=path, obj_loc=obj_loc) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("{file} data not imported properly into {obj}".format(file=file, obj=obj_loc)) + ERRORS += 1 + return ERRORS, repcount + + +def set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight): + # change the weight of osd.0 to math.pi in the newest osdmap of given osd + osdmap_file = tempfile.NamedTemporaryFile(delete=True) + cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path, + osdmap_file=osdmap_file.name) + output = check_output(cmd, shell=True).decode() + epoch = int(re.findall('#(\d+)', output)[0]) + + new_crush_file = tempfile.NamedTemporaryFile(delete=True) + old_crush_file = tempfile.NamedTemporaryFile(delete=True) + ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name, + crush_file=old_crush_file.name, path=CEPH_BIN), + stdout=DEVNULL, + stderr=DEVNULL, + shell=True) + assert(ret == 0) + + for osd_id in osd_ids: + cmd = "{path}/crushtool -i {crush_file} --reweight-item osd.{osd} {weight} -o {new_crush_file}".format(osd=osd_id, + crush_file=old_crush_file.name, + weight=weight, + new_crush_file=new_crush_file.name, path=CEPH_BIN) + ret = call(cmd, stdout=DEVNULL, shell=True) + assert(ret == 0) + old_crush_file, new_crush_file = new_crush_file, old_crush_file + + # change them back, since we don't need to preapre for another round + old_crush_file, new_crush_file = new_crush_file, old_crush_file + old_crush_file.close() + + ret = call("{path}/osdmaptool --import-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name, + crush_file=new_crush_file.name, path=CEPH_BIN), + stdout=DEVNULL, + stderr=DEVNULL, + shell=True) + assert(ret == 0) + + # Minimum test of --dry-run by using it, but not checking anything + cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force --dry-run" + cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch) + ret = call(cmd, stdout=DEVNULL, shell=True) + assert(ret == 0) + + # osdmaptool increases the epoch of the changed osdmap, so we need to force the tool + # to use use a different epoch than the one in osdmap + cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force" + cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch) + ret = call(cmd, stdout=DEVNULL, shell=True) + + return ret == 0 + +def get_osd_weights(CFSD_PREFIX, osd_ids, osd_path): + osdmap_file = tempfile.NamedTemporaryFile(delete=True) + cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path, + osdmap_file=osdmap_file.name) + ret = call(cmd, stdout=DEVNULL, shell=True) + if ret != 0: + return None + # we have to read the weights from the crush map, even we can query the weights using + # osdmaptool, but please keep in mind, they are different: + # item weights in crush map versus weight associated with each osd in osdmap + crush_file = tempfile.NamedTemporaryFile(delete=True) + ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name, + crush_file=crush_file.name, path=CEPH_BIN), + stdout=DEVNULL, + shell=True) + assert(ret == 0) + output = check_output("{path}/crushtool --tree -i {crush_file} | tail -n {num_osd}".format(crush_file=crush_file.name, + num_osd=len(osd_ids), path=CEPH_BIN), + stderr=DEVNULL, + shell=True).decode() + weights = [] + for line in output.strip().split('\n'): + print(line) + linev = re.split('\s+', line) + if linev[0] == '': + linev.pop(0) + print('linev %s' % linev) + weights.append(float(linev[2])) + + return weights + + +def test_get_set_osdmap(CFSD_PREFIX, osd_ids, osd_paths): + print("Testing get-osdmap and set-osdmap") + errors = 0 + kill_daemons() + weight = 1 / math.e # just some magic number in [0, 1] + changed = [] + for osd_path in osd_paths: + if set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight): + changed.append(osd_path) + else: + logging.warning("Failed to change the weights: {0}".format(osd_path)) + # i am pissed off if none of the store gets changed + if not changed: + errors += 1 + + for osd_path in changed: + weights = get_osd_weights(CFSD_PREFIX, osd_ids, osd_path) + if not weights: + errors += 1 + continue + if any(abs(w - weight) > 1e-5 for w in weights): + logging.warning("Weight is not changed: {0} != {1}".format(weights, weight)) + errors += 1 + return errors + +def test_get_set_inc_osdmap(CFSD_PREFIX, osd_path): + # incrementals are not used unless we need to build an MOSDMap to update + # OSD's peers, so an obvious way to test it is simply overwrite an epoch + # with a different copy, and read it back to see if it matches. + kill_daemons() + file_e2 = tempfile.NamedTemporaryFile(delete=True) + cmd = (CFSD_PREFIX + "--op get-inc-osdmap --file {file}").format(osd=osd_path, + file=file_e2.name) + output = check_output(cmd, shell=True).decode() + epoch = int(re.findall('#(\d+)', output)[0]) + # backup e1 incremental before overwriting it + epoch -= 1 + file_e1_backup = tempfile.NamedTemporaryFile(delete=True) + cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True) + if ret: return 1 + # overwrite e1 with e2 + cmd = CFSD_PREFIX + "--op set-inc-osdmap --force --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e2.name), shell=True) + if ret: return 1 + # Use dry-run to set back to e1 which shouldn't happen + cmd = CFSD_PREFIX + "--op set-inc-osdmap --dry-run --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True) + if ret: return 1 + # read from e1 + file_e1_read = tempfile.NamedTemporaryFile(delete=True) + cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_read.name), shell=True) + if ret: return 1 + errors = 0 + try: + if not filecmp.cmp(file_e2.name, file_e1_read.name, shallow=False): + logging.error("{{get,set}}-inc-osdmap mismatch {0} != {1}".format(file_e2.name, file_e1_read.name)) + errors += 1 + finally: + # revert the change with file_e1_backup + cmd = CFSD_PREFIX + "--op set-inc-osdmap --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True) + if ret: + logging.error("Failed to revert the changed inc-osdmap") + errors += 1 + + return errors + + +def test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS): + # Test removeall + TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid()) + nullfd = open(os.devnull, "w") + errors=0 + print("Test removeall") + kill_daemons() + test_force_remove = 0 + for nspace in db.keys(): + for basename in db[nspace].keys(): + JSON = db[nspace][basename]['json'] + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + + if int(basename.split(REP_NAME)[1]) <= int(NUM_CLONED_REP_OBJECTS): + cmd = (CFSD_PREFIX + "'{json}' remove").format(osd=osd, json=JSON) + errors += test_failure(cmd, "Clones are present, use removeall to delete everything") + if not test_force_remove: + + cmd = (CFSD_PREFIX + " '{json}' set-attr snapset /dev/null").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Test set-up to corrupt snapset failed for {json}".format(json=JSON)) + errors += 1 + # Do the removeall since this test failed to set-up + else: + test_force_remove = 1 + + cmd = (CFSD_PREFIX + " '{json}' --force remove").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("forced remove with corrupt snapset failed for {json}".format(json=JSON)) + errors += 1 + continue + + cmd = (CFSD_PREFIX + " --force --dry-run '{json}' remove").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("remove with --force failed for {json}".format(json=JSON)) + errors += 1 + + cmd = (CFSD_PREFIX + " --dry-run '{json}' removeall").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("removeall failed for {json}".format(json=JSON)) + errors += 1 + + cmd = (CFSD_PREFIX + " '{json}' removeall").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("removeall failed for {json}".format(json=JSON)) + errors += 1 + + tmpfd = open(TMPFILE, "w") + cmd = (CFSD_PREFIX + "--op list --pgid {pg} --namespace {ns} {name}").format(osd=osd, pg=pg, ns=nspace, name=basename) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + errors += 1 + tmpfd.close() + lines = get_lines(TMPFILE) + if len(lines) != 0: + logging.error("Removeall didn't remove all objects {ns}/{name} : {lines}".format(ns=nspace, name=basename, lines=lines)) + errors += 1 + vstart(new=False) + wait_for_health() + cmd = "{path}/rados -p {pool} rmsnap snap1".format(pool=REP_POOL, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("rados rmsnap failed") + errors += 1 + time.sleep(2) + wait_for_health() + return errors + + +def main(argv): + stdout = sys.stdout.buffer + if len(argv) > 1 and argv[1] == "debug": + nullfd = stdout + else: + nullfd = DEVNULL + + call("rm -fr {dir}; mkdir -p {dir}".format(dir=CEPH_DIR), shell=True) + os.chdir(CEPH_DIR) + os.environ["CEPH_DIR"] = CEPH_DIR + OSDDIR = "dev" + REP_POOL = "rep_pool" + REP_NAME = "REPobject" + EC_POOL = "ec_pool" + EC_NAME = "ECobject" + if len(argv) > 0 and argv[0] == 'large': + PG_COUNT = 12 + NUM_REP_OBJECTS = 200 + NUM_CLONED_REP_OBJECTS = 50 + NUM_EC_OBJECTS = 12 + NUM_NSPACES = 4 + # Larger data sets for first object per namespace + DATALINECOUNT = 50000 + # Number of objects to do xattr/omap testing on + ATTR_OBJS = 10 + else: + PG_COUNT = 4 + NUM_REP_OBJECTS = 2 + NUM_CLONED_REP_OBJECTS = 2 + NUM_EC_OBJECTS = 2 + NUM_NSPACES = 2 + # Larger data sets for first object per namespace + DATALINECOUNT = 10 + # Number of objects to do xattr/omap testing on + ATTR_OBJS = 2 + ERRORS = 0 + pid = os.getpid() + TESTDIR = "/tmp/test.{pid}".format(pid=pid) + DATADIR = "/tmp/data.{pid}".format(pid=pid) + CFSD_PREFIX = CEPH_BIN + "/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} " + PROFNAME = "testecprofile" + + os.environ['CEPH_CONF'] = CEPH_CONF + vstart(new=True) + wait_for_health() + + cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=REP_POOL, pg=PG_COUNT, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + time.sleep(2) + REPID = get_pool_id(REP_POOL, nullfd) + + print("Created Replicated pool #{repid}".format(repid=REPID)) + + cmd = "{path}/ceph osd erasure-code-profile set {prof} crush-failure-domain=osd".format(prof=PROFNAME, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + cmd = "{path}/ceph osd erasure-code-profile get {prof}".format(prof=PROFNAME, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + cmd = "{path}/ceph osd pool create {pool} {pg} {pg} erasure {prof}".format(pool=EC_POOL, prof=PROFNAME, pg=PG_COUNT, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + ECID = get_pool_id(EC_POOL, nullfd) + + print("Created Erasure coded pool #{ecid}".format(ecid=ECID)) + + print("Creating {objs} objects in replicated pool".format(objs=(NUM_REP_OBJECTS*NUM_NSPACES))) + cmd = "mkdir -p {datadir}".format(datadir=DATADIR) + logging.debug(cmd) + call(cmd, shell=True) + + db = {} + + objects = range(1, NUM_REP_OBJECTS + 1) + nspaces = range(NUM_NSPACES) + for n in nspaces: + nspace = get_nspace(n) + + db[nspace] = {} + + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + DDNAME += "__head" + + cmd = "rm -f " + DDNAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the replicated data for " + LNAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Rados put command failed with {ret}".format(ret=ret)) + return 1 + + db[nspace][NAME] = {} + + if i < ATTR_OBJS + 1: + keys = range(i) + else: + keys = range(0) + db[nspace][NAME]["xattr"] = {} + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[nspace][NAME]["xattr"][mykey] = myval + + # Create omap header in all objects but REPobject1 + if i < ATTR_OBJS + 1 and i != 1: + myhdr = "hdr{i}".format(i=i) + cmd = "{path}/rados -p {pool} -N '{nspace}' setomapheader {name} {hdr}".format(pool=REP_POOL, name=NAME, hdr=myhdr, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.critical("setomapheader failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[nspace][NAME]["omapheader"] = myhdr + + db[nspace][NAME]["omap"] = {} + for k in keys: + if k == 0: + continue + mykey = "okey{i}-{k}".format(i=i, k=k) + myval = "oval{i}-{k}".format(i=i, k=k) + cmd = "{path}/rados -p {pool} -N '{nspace}' setomapval {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.critical("setomapval failed with {ret}".format(ret=ret)) + db[nspace][NAME]["omap"][mykey] = myval + + # Create some clones + cmd = "{path}/rados -p {pool} mksnap snap1".format(pool=REP_POOL, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True) + + objects = range(1, NUM_CLONED_REP_OBJECTS + 1) + nspaces = range(NUM_NSPACES) + for n in nspaces: + nspace = get_nspace(n) + + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + # First clone + CLONENAME = DDNAME + "__1" + DDNAME += "__head" + + cmd = "mv -f " + DDNAME + " " + CLONENAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the replicated data after a snapshot for " + LNAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Rados put command failed with {ret}".format(ret=ret)) + return 1 + + print("Creating {objs} objects in erasure coded pool".format(objs=(NUM_EC_OBJECTS*NUM_NSPACES))) + + objects = range(1, NUM_EC_OBJECTS + 1) + nspaces = range(NUM_NSPACES) + for n in nspaces: + nspace = get_nspace(n) + + for i in objects: + NAME = EC_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + DDNAME += "__head" + + cmd = "rm -f " + DDNAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the erasure coded data for " + LNAME + "\n" + for j in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=EC_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Erasure coded pool creation failed with {ret}".format(ret=ret)) + return 1 + + db[nspace][NAME] = {} + + db[nspace][NAME]["xattr"] = {} + if i < ATTR_OBJS + 1: + keys = range(i) + else: + keys = range(0) + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=EC_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[nspace][NAME]["xattr"][mykey] = myval + + # Omap isn't supported in EC pools + db[nspace][NAME]["omap"] = {} + + logging.debug(db) + + kill_daemons() + + if ERRORS: + logging.critical("Unable to set up test") + return 1 + + ALLREPPGS = get_pgs(OSDDIR, REPID) + logging.debug(ALLREPPGS) + ALLECPGS = get_pgs(OSDDIR, ECID) + logging.debug(ALLECPGS) + + OBJREPPGS = get_objs(ALLREPPGS, REP_NAME, OSDDIR, REPID) + logging.debug(OBJREPPGS) + OBJECPGS = get_objs(ALLECPGS, EC_NAME, OSDDIR, ECID) + logging.debug(OBJECPGS) + + ONEPG = ALLREPPGS[0] + logging.debug(ONEPG) + osds = get_osds(ONEPG, OSDDIR) + ONEOSD = osds[0] + logging.debug(ONEOSD) + + print("Test invalid parameters") + # On export can't use stdout to a terminal + cmd = (CFSD_PREFIX + "--op export --pgid {pg}").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True) + + # On export can't use stdout to a terminal + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True) + + # Prep a valid ec export file for import failure tests + ONEECPG = ALLECPGS[0] + osds = get_osds(ONEECPG, OSDDIR) + ONEECOSD = osds[0] + OTHERFILE = "/tmp/foo.{pid}".format(pid=pid) + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEECOSD, pg=ONEECPG, file=OTHERFILE) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + + os.unlink(OTHERFILE) + + # Prep a valid export file for import failure tests + OTHERFILE = "/tmp/foo.{pid}".format(pid=pid) + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEOSD, pg=ONEPG, file=OTHERFILE) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + + # On import can't specify a different pgid than the file + TMPPG="{pool}.80".format(pool=REPID) + cmd = (CFSD_PREFIX + "--op import --pgid 12.dd --file {file}").format(osd=ONEOSD, pg=TMPPG, file=OTHERFILE) + ERRORS += test_failure(cmd, "specified pgid 12.dd does not match actual pgid") + + os.unlink(OTHERFILE) + cmd = (CFSD_PREFIX + "--op import --file {FOO}").format(osd=ONEOSD, FOO=OTHERFILE) + ERRORS += test_failure(cmd, "file: {FOO}: No such file or directory".format(FOO=OTHERFILE)) + + cmd = "{path}/ceph-objectstore-tool --no-mon-config --data-path BAD_DATA_PATH --op list".format(path=CEPH_BIN) + ERRORS += test_failure(cmd, "data-path: BAD_DATA_PATH: No such file or directory") + + cmd = (CFSD_PREFIX + "--journal-path BAD_JOURNAL_PATH --op list").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "journal-path: BAD_JOURNAL_PATH: No such file or directory") + + cmd = (CFSD_PREFIX + "--journal-path /bin --op list").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "journal-path: /bin: (21) Is a directory") + + # On import can't use stdin from a terminal + cmd = (CFSD_PREFIX + "--op import --pgid {pg}").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True) + + # On import can't use stdin from a terminal + cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True) + + # Specify a bad --type + os.mkdir(OSDDIR + "/fakeosd") + cmd = ("{path}/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} --type foobar --op list --pgid {pg}").format(osd="fakeosd", pg=ONEPG, path=CEPH_BIN) + ERRORS += test_failure(cmd, "Unable to create store of type foobar") + + # Don't specify a data-path + cmd = "{path}/ceph-objectstore-tool --no-mon-config --type memstore --op list --pgid {pg}".format(pg=ONEPG, path=CEPH_BIN) + ERRORS += test_failure(cmd, "Must provide --data-path") + + cmd = (CFSD_PREFIX + "--op remove --pgid 2.0").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Please use export-remove or you must use --force option") + + cmd = (CFSD_PREFIX + "--force --op remove").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Must provide pgid") + + # Don't secify a --op nor object command + cmd = CFSD_PREFIX.format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Must provide --op or object command...") + + # Specify a bad --op command + cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)") + + # Provide just the object param not a command + cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Invalid syntax, missing command") + + # Provide an object name that doesn't exist + cmd = (CFSD_PREFIX + "NON_OBJECT get-bytes").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "No object id 'NON_OBJECT' found") + + # Provide an invalid object command + cmd = (CFSD_PREFIX + "--pgid {pg} '' notacommand").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Unknown object command 'notacommand'") + + cmd = (CFSD_PREFIX + "foo list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "No object id 'foo' found or invalid JSON specified") + + cmd = (CFSD_PREFIX + "'{{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}}' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Without --pgid the object '{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}' must be a JSON array") + + cmd = (CFSD_PREFIX + "'[]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[]' must be a JSON array with 2 elements") + + cmd = (CFSD_PREFIX + "'[\"1.0\"]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[\"1.0\"]' must be a JSON array with 2 elements") + + cmd = (CFSD_PREFIX + "'[\"1.0\", 5, 8, 9]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[\"1.0\", 5, 8, 9]' must be a JSON array with 2 elements") + + cmd = (CFSD_PREFIX + "'[1, 2]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[1, 2]' must be a JSON array with the first element a string") + + cmd = (CFSD_PREFIX + "'[\"1.3\",{{\"snapid\":\"not an int\"}}]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Decode object JSON error: value type is 2 not 4") + + TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid) + ALLPGS = OBJREPPGS + OBJECPGS + OSDS = get_osds(ALLPGS[0], OSDDIR) + osd = OSDS[0] + + print("Test all --op dump-journal") + ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0] + ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS) + + # Test --op list and generate json for all objects + print("Test --op list variants") + + # retrieve all objects from all PGs + tmpfd = open(TMPFILE, "wb") + cmd = (CFSD_PREFIX + "--op list --format json").format(osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + tmpfd.close() + lines = get_lines(TMPFILE) + JSONOBJ = sorted(set(lines)) + (pgid, coll, jsondict) = json.loads(JSONOBJ[0])[0] + + # retrieve all objects in a given PG + tmpfd = open(OTHERFILE, "ab") + cmd = (CFSD_PREFIX + "--op list --pgid {pg} --format json").format(osd=osd, pg=pgid) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + tmpfd.close() + lines = get_lines(OTHERFILE) + JSONOBJ = sorted(set(lines)) + (other_pgid, other_coll, other_jsondict) = json.loads(JSONOBJ[0])[0] + + if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll: + logging.error("the first line of --op list is different " + "from the first line of --op list --pgid {pg}".format(pg=pgid)) + ERRORS += 1 + + # retrieve all objects with a given name in a given PG + tmpfd = open(OTHERFILE, "wb") + cmd = (CFSD_PREFIX + "--op list --pgid {pg} {object} --format json").format(osd=osd, pg=pgid, object=jsondict['oid']) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + tmpfd.close() + lines = get_lines(OTHERFILE) + JSONOBJ = sorted(set(lines)) + (other_pgid, other_coll, other_jsondict) in json.loads(JSONOBJ[0])[0] + + if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll: + logging.error("the first line of --op list is different " + "from the first line of --op list --pgid {pg} {object}".format(pg=pgid, object=jsondict['oid'])) + ERRORS += 1 + + print("Test --op list by generating json for all objects using default format") + for pg in ALLPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + tmpfd = open(TMPFILE, "ab") + cmd = (CFSD_PREFIX + "--op list --pgid {pg}").format(osd=osd, pg=pg) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from --op list request".format(ret=ret)) + ERRORS += 1 + + tmpfd.close() + lines = get_lines(TMPFILE) + JSONOBJ = sorted(set(lines)) + for JSON in JSONOBJ: + (pgid, jsondict) = json.loads(JSON) + # Skip clones for now + if jsondict['snapid'] != -2: + continue + db[jsondict['namespace']][jsondict['oid']]['json'] = json.dumps((pgid, jsondict)) + # print db[jsondict['namespace']][jsondict['oid']]['json'] + if jsondict['oid'].find(EC_NAME) == 0 and 'shard_id' not in jsondict: + logging.error("Malformed JSON {json}".format(json=JSON)) + ERRORS += 1 + + # Test get-bytes + print("Test get-bytes and set-bytes") + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename + "__head") + JSON = db[nspace][basename]['json'] + GETNAME = "/tmp/getbytes.{pid}".format(pid=pid) + TESTNAME = "/tmp/testbytes.{pid}".format(pid=pid) + SETNAME = "/tmp/setbytes.{pid}".format(pid=pid) + BADNAME = "/tmp/badbytes.{pid}".format(pid=pid) + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + try: + os.unlink(GETNAME) + except: + pass + cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-bytes {fname}").format(osd=osd, pg=pg, json=JSON, fname=GETNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret}".format(ret=ret)) + ERRORS += 1 + continue + cmd = "diff -q {file} {getfile}".format(file=file, getfile=GETNAME) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Data from get-bytes differ") + logging.debug("Got:") + cat_file(logging.DEBUG, GETNAME) + logging.debug("Expected:") + cat_file(logging.DEBUG, file) + ERRORS += 1 + fd = open(SETNAME, "w") + data = "put-bytes going into {file}\n".format(file=file) + fd.write(data) + fd.close() + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=SETNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-bytes".format(ret=ret)) + ERRORS += 1 + fd = open(TESTNAME, "wb") + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=fd) + fd.close() + if ret != 0: + logging.error("Bad exit status {ret} from get-bytes".format(ret=ret)) + ERRORS += 1 + cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Data after set-bytes differ") + logging.debug("Got:") + cat_file(logging.DEBUG, TESTNAME) + logging.debug("Expected:") + cat_file(logging.DEBUG, SETNAME) + ERRORS += 1 + + # Use set-bytes with --dry-run and make sure contents haven't changed + fd = open(BADNAME, "w") + data = "Bad data for --dry-run in {file}\n".format(file=file) + fd.write(data) + fd.close() + cmd = (CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=BADNAME) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-bytes --dry-run".format(ret=ret)) + ERRORS += 1 + fd = open(TESTNAME, "wb") + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=fd) + fd.close() + if ret != 0: + logging.error("Bad exit status {ret} from get-bytes".format(ret=ret)) + ERRORS += 1 + cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Data after set-bytes --dry-run changed!") + logging.debug("Got:") + cat_file(logging.DEBUG, TESTNAME) + logging.debug("Expected:") + cat_file(logging.DEBUG, SETNAME) + ERRORS += 1 + + fd = open(file, "rb") + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdin=fd) + if ret != 0: + logging.error("Bad exit status {ret} from set-bytes to restore object".format(ret=ret)) + ERRORS += 1 + fd.close() + + try: + os.unlink(GETNAME) + except: + pass + try: + os.unlink(TESTNAME) + except: + pass + try: + os.unlink(SETNAME) + except: + pass + try: + os.unlink(BADNAME) + except: + pass + + # Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap + print("Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap") + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename + "__head") + JSON = db[nspace][basename]['json'] + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + for key, val in db[nspace][basename]["xattr"].items(): + attrkey = "_" + key + cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key=attrkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if getval != val: + logging.error("get-attr of key {key} returned wrong val: {get} instead of {orig}".format(key=attrkey, get=getval, orig=val)) + ERRORS += 1 + continue + # set-attr to bogus value "foobar" + cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-attr".format(ret=ret)) + ERRORS += 1 + continue + # Test set-attr with dry-run + cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-attr".format(ret=ret)) + ERRORS += 1 + continue + # Check the set-attr + cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if ret != 0: + logging.error("Bad exit status {ret} from get-attr".format(ret=ret)) + ERRORS += 1 + continue + if getval != "foobar": + logging.error("Check of set-attr failed because we got {val}".format(val=getval)) + ERRORS += 1 + continue + # Test rm-attr + cmd = (CFSD_PREFIX + "'{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from rm-attr".format(ret=ret)) + ERRORS += 1 + continue + # Check rm-attr with dry-run + cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from rm-attr".format(ret=ret)) + ERRORS += 1 + continue + cmd = (CFSD_PREFIX + "'{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd) + if ret == 0: + logging.error("For rm-attr expect get-attr to fail, but it succeeded") + ERRORS += 1 + # Put back value + cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey, val=val) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-attr".format(ret=ret)) + ERRORS += 1 + continue + + hdr = db[nspace][basename].get("omapheader", "") + cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, json=JSON) + logging.debug(cmd) + gethdr = check_output(cmd, shell=True).decode() + if gethdr != hdr: + logging.error("get-omaphdr was wrong: {get} instead of {orig}".format(get=gethdr, orig=hdr)) + ERRORS += 1 + continue + # set-omaphdr to bogus value "foobar" + cmd = ("echo -n foobar | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + # Check the set-omaphdr + cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + gethdr = check_output(cmd, shell=True).decode() + if ret != 0: + logging.error("Bad exit status {ret} from get-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + if gethdr != "foobar": + logging.error("Check of set-omaphdr failed because we got {val}".format(val=getval)) + ERRORS += 1 + continue + # Test dry-run with set-omaphdr + cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + # Put back value + cmd = ("echo -n {val} | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON, val=hdr) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + + for omapkey, val in db[nspace][basename]["omap"].items(): + cmd = (CFSD_PREFIX + " '{json}' get-omap {key}").format(osd=osd, json=JSON, key=omapkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if getval != val: + logging.error("get-omap of key {key} returned wrong val: {get} instead of {orig}".format(key=omapkey, get=getval, orig=val)) + ERRORS += 1 + continue + # set-omap to bogus value "foobar" + cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omap".format(ret=ret)) + ERRORS += 1 + continue + # Check set-omap with dry-run + cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-omap".format(ret=ret)) + ERRORS += 1 + continue + # Check the set-omap + cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if ret != 0: + logging.error("Bad exit status {ret} from get-omap".format(ret=ret)) + ERRORS += 1 + continue + if getval != "foobar": + logging.error("Check of set-omap failed because we got {val}".format(val=getval)) + ERRORS += 1 + continue + # Test rm-omap + cmd = (CFSD_PREFIX + "'{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from rm-omap".format(ret=ret)) + ERRORS += 1 + # Check rm-omap with dry-run + cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from rm-omap".format(ret=ret)) + ERRORS += 1 + cmd = (CFSD_PREFIX + "'{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd) + if ret == 0: + logging.error("For rm-omap expect get-omap to fail, but it succeeded") + ERRORS += 1 + # Put back value + cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey, val=val) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omap".format(ret=ret)) + ERRORS += 1 + continue + + # Test dump + print("Test dump") + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename + "__head") + JSON = db[nspace][basename]['json'] + jsondict = json.loads(JSON) + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + if int(basename.split(REP_NAME)[1]) > int(NUM_CLONED_REP_OBJECTS): + continue + logging.debug("REPobject " + JSON) + cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"snap\": 1,' > /dev/null").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Invalid dump for {json}".format(json=JSON)) + ERRORS += 1 + if 'shard_id' in jsondict[1]: + logging.debug("ECobject " + JSON) + for pg in OBJECPGS: + OSDS = get_osds(pg, OSDDIR) + jsondict = json.loads(JSON) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + if int(basename.split(EC_NAME)[1]) > int(NUM_EC_OBJECTS): + continue + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"hinfo\": [{{]' > /dev/null").format(osd=osd, json=json.dumps((pg, jsondict[1]))) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Invalid dump for {json}".format(json=JSON)) + + print("Test list-attrs get-attr") + ATTRFILE = r"/tmp/attrs.{pid}".format(pid=pid) + VALFILE = r"/tmp/val.{pid}".format(pid=pid) + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename) + JSON = db[nspace][basename]['json'] + jsondict = json.loads(JSON) + + if 'shard_id' in jsondict[1]: + logging.debug("ECobject " + JSON) + found = 0 + for pg in OBJECPGS: + OSDS = get_osds(pg, OSDDIR) + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + JSON = json.dumps((pg, jsondict[1])) + for osd in OSDS: + cmd = (CFSD_PREFIX + " --tty '{json}' get-attr hinfo_key").format(osd=osd, json=JSON) + logging.debug("TRY: " + cmd) + try: + out = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode() + logging.debug("FOUND: {json} in {osd} has value '{val}'".format(osd=osd, json=JSON, val=out)) + found += 1 + except subprocess.CalledProcessError as e: + logging.debug("Error message: {output}".format(output=e.output)) + if "No such file or directory" not in str(e.output) and \ + "No data available" not in str(e.output) and \ + "not contained by pg" not in str(e.output): + raise + # Assuming k=2 m=1 for the default ec pool + if found != 3: + logging.error("{json} hinfo_key found {found} times instead of 3".format(json=JSON, found=found)) + ERRORS += 1 + + for pg in ALLPGS: + # Make sure rep obj with rep pg or ec obj with ec pg + if ('shard_id' in jsondict[1]) != (pg.find('s') > 0): + continue + if 'shard_id' in jsondict[1]: + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + JSON = json.dumps((pg, jsondict[1])) + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + afd = open(ATTRFILE, "wb") + cmd = (CFSD_PREFIX + " '{json}' list-attrs").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=afd) + afd.close() + if ret != 0: + logging.error("list-attrs failed with {ret}".format(ret=ret)) + ERRORS += 1 + continue + keys = get_lines(ATTRFILE) + values = dict(db[nspace][basename]["xattr"]) + for key in keys: + if key == "_" or key == "snapset" or key == "hinfo_key": + continue + key = key.strip("_") + if key not in values: + logging.error("Unexpected key {key} present".format(key=key)) + ERRORS += 1 + continue + exp = values.pop(key) + vfd = open(VALFILE, "wb") + cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key="_" + key) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=vfd) + vfd.close() + if ret != 0: + logging.error("get-attr failed with {ret}".format(ret=ret)) + ERRORS += 1 + continue + lines = get_lines(VALFILE) + val = lines[0] + if exp != val: + logging.error("For key {key} got value {got} instead of {expected}".format(key=key, got=val, expected=exp)) + ERRORS += 1 + if len(values) != 0: + logging.error("Not all keys found, remaining keys:") + print(values) + + print("Test --op meta-list") + tmpfd = open(TMPFILE, "wb") + cmd = (CFSD_PREFIX + "--op meta-list").format(osd=ONEOSD) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from --op meta-list request".format(ret=ret)) + ERRORS += 1 + + print("Test get-bytes on meta") + tmpfd.close() + lines = get_lines(TMPFILE) + JSONOBJ = sorted(set(lines)) + for JSON in JSONOBJ: + (pgid, jsondict) = json.loads(JSON) + if pgid != "meta": + logging.error("pgid incorrect for --op meta-list {pgid}".format(pgid=pgid)) + ERRORS += 1 + if jsondict['namespace'] != "": + logging.error("namespace non null --op meta-list {ns}".format(ns=jsondict['namespace'])) + ERRORS += 1 + logging.info(JSON) + try: + os.unlink(GETNAME) + except: + pass + cmd = (CFSD_PREFIX + "'{json}' get-bytes {fname}").format(osd=ONEOSD, json=JSON, fname=GETNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret}".format(ret=ret)) + ERRORS += 1 + + try: + os.unlink(GETNAME) + except: + pass + try: + os.unlink(TESTNAME) + except: + pass + + print("Test pg info") + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + cmd = (CFSD_PREFIX + "--op info --pgid {pg} | grep '\"pgid\": \"{pg}\"'").format(osd=osd, pg=pg) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Getting info failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + ERRORS += 1 + + print("Test pg logging") + if len(ALLREPPGS + ALLECPGS) == len(OBJREPPGS + OBJECPGS): + logging.warning("All PGs have objects, so no log without modify entries") + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + tmpfd = open(TMPFILE, "wb") + cmd = (CFSD_PREFIX + "--op log --pgid {pg}").format(osd=osd, pg=pg) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Getting log failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + ERRORS += 1 + HASOBJ = pg in OBJREPPGS + OBJECPGS + MODOBJ = False + for line in get_lines(TMPFILE): + if line.find("modify") != -1: + MODOBJ = True + break + if HASOBJ != MODOBJ: + logging.error("Bad log for pg {pg} from {osd}".format(pg=pg, osd=osd)) + MSG = (HASOBJ and [""] or ["NOT "])[0] + print("Log should {msg}have a modify entry".format(msg=MSG)) + ERRORS += 1 + + try: + os.unlink(TMPFILE) + except: + pass + + print("Test list-pgs") + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + + CHECK_PGS = get_osd_pgs(os.path.join(OSDDIR, osd), None) + CHECK_PGS = sorted(CHECK_PGS) + + cmd = (CFSD_PREFIX + "--op list-pgs").format(osd=osd) + logging.debug(cmd) + TEST_PGS = check_output(cmd, shell=True).decode().split("\n") + TEST_PGS = sorted(TEST_PGS)[1:] # Skip extra blank line + + if TEST_PGS != CHECK_PGS: + logging.error("list-pgs got wrong result for osd.{osd}".format(osd=osd)) + logging.error("Expected {pgs}".format(pgs=CHECK_PGS)) + logging.error("Got {pgs}".format(pgs=TEST_PGS)) + ERRORS += 1 + + EXP_ERRORS = 0 + print("Test pg export --dry-run") + pg = ALLREPPGS[0] + osd = get_osds(pg, OSDDIR)[0] + fname = "/tmp/fname.{pid}".format(pid=pid) + cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + elif os.path.exists(fname): + logging.error("Exporting --dry-run created file") + EXP_ERRORS += 1 + + cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + else: + outdata = get_lines(fname) + if len(outdata) > 0: + logging.error("Exporting --dry-run to stdout not empty") + logging.error("Data: " + outdata) + EXP_ERRORS += 1 + + os.mkdir(TESTDIR) + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + os.mkdir(os.path.join(TESTDIR, osd)) + print("Test pg export") + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + mydir = os.path.join(TESTDIR, osd) + fname = os.path.join(mydir, pg) + if pg == ALLREPPGS[0]: + cmd = (CFSD_PREFIX + "--op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname) + elif pg == ALLREPPGS[1]: + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file - > {file}").format(osd=osd, pg=pg, file=fname) + else: + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + print("Test clear-data-digest") + for nspace in db.keys(): + for basename in db[nspace].keys(): + JSON = db[nspace][basename]['json'] + cmd = (CFSD_PREFIX + "'{json}' clear-data-digest").format(osd='osd0', json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Clearing data digest failed for {json}".format(json=JSON)) + ERRORS += 1 + break + cmd = (CFSD_PREFIX + "'{json}' dump | grep '\"data_digest\": \"0xff'").format(osd='osd0', json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Data digest not cleared for {json}".format(json=JSON)) + ERRORS += 1 + break + break + break + + print("Test pg removal") + RM_ERRORS = 0 + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + # This should do nothing + cmd = (CFSD_PREFIX + "--op remove --pgid {pg} --dry-run").format(pg=pg, osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Removing --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + RM_ERRORS += 1 + cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Removing failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + RM_ERRORS += 1 + + ERRORS += RM_ERRORS + + IMP_ERRORS = 0 + if EXP_ERRORS == 0 and RM_ERRORS == 0: + print("Test pg import") + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + dir = os.path.join(TESTDIR, osd) + PGS = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))] + for pg in PGS: + file = os.path.join(dir, pg) + # Make sure this doesn't crash + cmd = (CFSD_PREFIX + "--op dump-export --file {file}").format(osd=osd, file=file) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Dump-export failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + # This should do nothing + cmd = (CFSD_PREFIX + "--op import --file {file} --dry-run").format(osd=osd, file=file) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + if pg == PGS[0]: + cmd = ("cat {file} |".format(file=file) + CFSD_PREFIX + "--op import").format(osd=osd) + elif pg == PGS[1]: + cmd = (CFSD_PREFIX + "--op import --file - --pgid {pg} < {file}").format(osd=osd, file=file, pg=pg) + else: + cmd = (CFSD_PREFIX + "--op import --file {file}").format(osd=osd, file=file) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + else: + logging.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") + + ERRORS += IMP_ERRORS + logging.debug(cmd) + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + print("Verify replicated import data") + data_errors, _ = check_data(DATADIR, TMPFILE, OSDDIR, REP_NAME) + ERRORS += data_errors + else: + logging.warning("SKIPPING CHECKING IMPORT DATA DUE TO PREVIOUS FAILURES") + + print("Test all --op dump-journal again") + ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0] + ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS) + + vstart(new=False) + wait_for_health() + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + print("Verify erasure coded import data") + ERRORS += verify(DATADIR, EC_POOL, EC_NAME, db) + # Check replicated data/xattr/omap using rados + print("Verify replicated import data using rados") + ERRORS += verify(DATADIR, REP_POOL, REP_NAME, db) + + if EXP_ERRORS == 0: + NEWPOOL = "rados-import-pool" + cmd = "{path}/ceph osd pool create {pool} 8".format(pool=NEWPOOL, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + + print("Test rados import") + first = True + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + dir = os.path.join(TESTDIR, osd) + for pg in [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]: + if pg.find("{id}.".format(id=REPID)) != 0: + continue + file = os.path.join(dir, pg) + if first: + first = False + # This should do nothing + cmd = "{path}/rados import -p {pool} --dry-run {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Rados import --dry-run failed from {file} with {ret}".format(file=file, ret=ret)) + ERRORS += 1 + cmd = "{path}/rados -p {pool} ls".format(pool=NEWPOOL, path=CEPH_BIN) + logging.debug(cmd) + data = check_output(cmd, shell=True).decode() + if data: + logging.error("'{data}'".format(data=data)) + logging.error("Found objects after dry-run") + ERRORS += 1 + cmd = "{path}/rados import -p {pool} {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Rados import failed from {file} with {ret}".format(file=file, ret=ret)) + ERRORS += 1 + cmd = "{path}/rados import -p {pool} --no-overwrite {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Rados import --no-overwrite failed from {file} with {ret}".format(file=file, ret=ret)) + ERRORS += 1 + + ERRORS += verify(DATADIR, NEWPOOL, REP_NAME, db) + else: + logging.warning("SKIPPING IMPORT-RADOS TESTS DUE TO PREVIOUS FAILURES") + + # Clear directories of previous portion + call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True) + call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True) + os.mkdir(TESTDIR) + os.mkdir(DATADIR) + + # Cause SPLIT_POOL to split and test import with object/log filtering + print("Testing import all objects after a split") + SPLIT_POOL = "split_pool" + PG_COUNT = 1 + SPLIT_OBJ_COUNT = 5 + SPLIT_NSPACE_COUNT = 2 + SPLIT_NAME = "split" + cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=SPLIT_POOL, pg=PG_COUNT, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + SPLITID = get_pool_id(SPLIT_POOL, nullfd) + pool_size = int(check_output("{path}/ceph osd pool get {pool} size".format(pool=SPLIT_POOL, path=CEPH_BIN), shell=True, stderr=nullfd).decode().split(" ")[1]) + EXP_ERRORS = 0 + RM_ERRORS = 0 + IMP_ERRORS = 0 + + objects = range(1, SPLIT_OBJ_COUNT + 1) + nspaces = range(SPLIT_NSPACE_COUNT) + for n in nspaces: + nspace = get_nspace(n) + + for i in objects: + NAME = SPLIT_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + DDNAME += "__head" + + cmd = "rm -f " + DDNAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the split data for " + LNAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=SPLIT_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Rados put command failed with {ret}".format(ret=ret)) + return 1 + + wait_for_health() + kill_daemons() + + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + os.mkdir(os.path.join(TESTDIR, osd)) + + pg = "{pool}.0".format(pool=SPLITID) + EXPORT_PG = pg + + export_osds = get_osds(pg, OSDDIR) + for osd in export_osds: + mydir = os.path.join(TESTDIR, osd) + fname = os.path.join(mydir, pg) + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + if EXP_ERRORS == 0: + vstart(new=False) + wait_for_health() + + cmd = "{path}/ceph osd pool set {pool} pg_num 2".format(pool=SPLIT_POOL, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + time.sleep(5) + wait_for_health() + + kill_daemons() + + # Now 2 PGs, poolid.0 and poolid.1 + # make note of pgs before we remove the pgs... + osds = get_osds("{pool}.0".format(pool=SPLITID), OSDDIR); + for seed in range(2): + pg = "{pool}.{seed}".format(pool=SPLITID, seed=seed) + + for osd in osds: + cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + + which = 0 + for osd in osds: + # This is weird. The export files are based on only the EXPORT_PG + # and where that pg was before the split. Use 'which' to use all + # export copies in import. + mydir = os.path.join(TESTDIR, export_osds[which]) + fname = os.path.join(mydir, EXPORT_PG) + which += 1 + cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=osd, pg=EXPORT_PG, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + + ERRORS += IMP_ERRORS + + # Start up again to make sure imports didn't corrupt anything + if IMP_ERRORS == 0: + print("Verify split import data") + data_errors, count = check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME) + ERRORS += data_errors + if count != (SPLIT_OBJ_COUNT * SPLIT_NSPACE_COUNT * pool_size): + logging.error("Incorrect number of replicas seen {count}".format(count=count)) + ERRORS += 1 + vstart(new=False) + wait_for_health() + + call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True) + call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True) + + ERRORS += test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS) + + # vstart() starts 4 OSDs + ERRORS += test_get_set_osdmap(CFSD_PREFIX, list(range(4)), ALLOSDS) + ERRORS += test_get_set_inc_osdmap(CFSD_PREFIX, ALLOSDS[0]) + + kill_daemons() + CORES = [f for f in os.listdir(CEPH_DIR) if f.startswith("core.")] + if CORES: + CORE_DIR = os.path.join("/tmp", "cores.{pid}".format(pid=os.getpid())) + os.mkdir(CORE_DIR) + call("/bin/mv {ceph_dir}/core.* {core_dir}".format(ceph_dir=CEPH_DIR, core_dir=CORE_DIR), shell=True) + logging.error("Failure due to cores found") + logging.error("See {core_dir} for cores".format(core_dir=CORE_DIR)) + ERRORS += len(CORES) + + if ERRORS == 0: + print("TEST PASSED") + return 0 + else: + print("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) + return 1 + + +def remove_btrfs_subvolumes(path): + if platform.system() == "FreeBSD": + return + result = subprocess.Popen("stat -f -c '%%T' %s" % path, shell=True, stdout=subprocess.PIPE) + for line in result.stdout: + filesystem = line.decode('utf-8').rstrip('\n') + if filesystem == "btrfs": + result = subprocess.Popen("sudo btrfs subvolume list %s" % path, shell=True, stdout=subprocess.PIPE) + for line in result.stdout: + subvolume = line.decode('utf-8').split()[8] + # extracting the relative volume name + m = re.search(".*(%s.*)" % path, subvolume) + if m: + found = m.group(1) + call("sudo btrfs subvolume delete %s" % found, shell=True) + + +if __name__ == "__main__": + status = 1 + try: + status = main(sys.argv[1:]) + finally: + kill_daemons() + os.chdir(CEPH_BUILD_DIR) + remove_btrfs_subvolumes(CEPH_DIR) + call("/bin/rm -fr {dir}".format(dir=CEPH_DIR), shell=True) + sys.exit(status) diff --git a/qa/standalone/special/test-failure.sh b/qa/standalone/special/test-failure.sh new file mode 100755 index 000000000..cede887d2 --- /dev/null +++ b/qa/standalone/special/test-failure.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -ex + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_failure_log() { + local dir=$1 + + cat > $dir/test_failure.log << EOF +This is a fake log file +* +* +* +* +* +This ends the fake log file +EOF + + # Test fails + return 1 +} + +function TEST_failure_core_only() { + local dir=$1 + + run_mon $dir a || return 1 + kill_daemons $dir SEGV mon 5 + return 0 +} + +main test_failure "$@" |