diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/test/vhost | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/test/vhost')
54 files changed, 6329 insertions, 0 deletions
diff --git a/src/spdk/test/vhost/common/autotest.config b/src/spdk/test/vhost/common/autotest.config new file mode 100644 index 00000000..96b0d08b --- /dev/null +++ b/src/spdk/test/vhost/common/autotest.config @@ -0,0 +1,38 @@ +vhost_0_reactor_mask="[0]" +vhost_0_master_core=0 + +VM_0_qemu_mask=1-2 +VM_0_qemu_numa_node=0 + +VM_1_qemu_mask=3-4 +VM_1_qemu_numa_node=0 + +VM_2_qemu_mask=5-6 +VM_2_qemu_numa_node=0 + +VM_3_qemu_mask=7-8 +VM_3_qemu_numa_node=0 + +VM_4_qemu_mask=9-10 +VM_4_qemu_numa_node=0 + +VM_5_qemu_mask=11-12 +VM_5_qemu_numa_node=0 + +VM_6_qemu_mask=13-14 +VM_6_qemu_numa_node=1 + +VM_7_qemu_mask=15-16 +VM_7_qemu_numa_node=1 + +VM_8_qemu_mask=17-18 +VM_8_qemu_numa_node=1 + +VM_9_qemu_mask=19-20 +VM_9_qemu_numa_node=1 + +VM_10_qemu_mask=21-22 +VM_10_qemu_numa_node=1 + +VM_11_qemu_mask=23-24 +VM_11_qemu_numa_node=1 diff --git a/src/spdk/test/vhost/common/common.sh b/src/spdk/test/vhost/common/common.sh new file mode 100644 index 00000000..19c4be62 --- /dev/null +++ b/src/spdk/test/vhost/common/common.sh @@ -0,0 +1,1109 @@ +set -e + +: ${SPDK_VHOST_VERBOSE=false} +: ${QEMU_PREFIX="/usr/local/qemu/spdk-2.12"} + +BASE_DIR=$(readlink -f $(dirname ${BASH_SOURCE[0]})) + +# Default running dir -> spdk/.. +[[ -z "$TEST_DIR" ]] && TEST_DIR=$BASE_DIR/../../../../ + +TEST_DIR="$(mkdir -p $TEST_DIR && cd $TEST_DIR && echo $PWD)" +SPDK_BUILD_DIR=$BASE_DIR/../../../ + +SPDK_VHOST_SCSI_TEST_DIR=$TEST_DIR/vhost + +function message() +{ + if ! $SPDK_VHOST_VERBOSE; then + local verbose_out="" + elif [[ ${FUNCNAME[2]} == "source" ]]; then + local verbose_out=" (file $(basename ${BASH_SOURCE[1]}):${BASH_LINENO[1]})" + else + local verbose_out=" (function ${FUNCNAME[2]}:${BASH_LINENO[1]})" + fi + + local msg_type="$1" + shift + echo -e "${msg_type}${verbose_out}: $@" +} + +function fail() +{ + echo "===========" >&2 + message "FAIL" "$@" >&2 + echo "===========" >&2 + exit 1 +} + +function error() +{ + echo "===========" >&2 + message "ERROR" "$@" >&2 + echo "===========" >&2 + # Don't 'return 1' since the stack trace will be incomplete (why?) missing upper command. + false +} + +function warning() +{ + message "WARN" "$@" >&2 +} + +function notice() +{ + message "INFO" "$@" +} + + +# SSH key file +: ${SPDK_VHOST_SSH_KEY_FILE="$(readlink -e $HOME/.ssh/spdk_vhost_id_rsa)"} +if [[ ! -r "$SPDK_VHOST_SSH_KEY_FILE" ]]; then + error "Could not find SSH key file $SPDK_VHOST_SSH_KEY_FILE" + exit 1 +fi +echo "Using SSH key file $SPDK_VHOST_SSH_KEY_FILE" + +VM_BASE_DIR="$TEST_DIR/vms" + + +mkdir -p $TEST_DIR + +# +# Source config describing QEMU and VHOST cores and NUMA +# +source $(readlink -f $(dirname ${BASH_SOURCE[0]}))/autotest.config + +# Trace flag is optional, if it wasn't set earlier - disable it after sourcing +# autotest_common.sh +if [[ $- =~ x ]]; then + source $SPDK_BUILD_DIR/test/common/autotest_common.sh +else + source $SPDK_BUILD_DIR/test/common/autotest_common.sh + set +x +fi + +function get_vhost_dir() +{ + if [[ ! -z "$1" ]]; then + assert_number "$1" + local vhost_num=$1 + else + local vhost_num=0 + fi + + echo "$SPDK_VHOST_SCSI_TEST_DIR${vhost_num}" +} + +function spdk_vhost_list_all() +{ + shopt -s nullglob + local vhost_list="$(echo $SPDK_VHOST_SCSI_TEST_DIR[0-9]*)" + shopt -u nullglob + + if [[ ! -z "$vhost_list" ]]; then + vhost_list="$(basename --multiple $vhost_list)" + echo "${vhost_list//vhost/}" + fi +} + +function spdk_vhost_run() +{ + local param + local vhost_num=0 + local vhost_conf_path="" + local memory=1024 + + for param in "$@"; do + case $param in + --vhost-num=*) + vhost_num="${param#*=}" + assert_number "$vhost_num" + ;; + --conf-path=*) local vhost_conf_path="${param#*=}" ;; + --json-path=*) local vhost_json_path="${param#*=}" ;; + --memory=*) local memory=${param#*=} ;; + --no-pci*) local no_pci="-u" ;; + *) + error "Invalid parameter '$param'" + return 1 + ;; + esac + done + + local vhost_dir="$(get_vhost_dir $vhost_num)" + local vhost_app="$SPDK_BUILD_DIR/app/vhost/vhost" + local vhost_log_file="$vhost_dir/vhost.log" + local vhost_pid_file="$vhost_dir/vhost.pid" + local vhost_socket="$vhost_dir/usvhost" + local vhost_conf_template="$vhost_conf_path/vhost.conf.in" + local vhost_conf_file="$vhost_conf_path/vhost.conf" + notice "starting vhost app in background" + [[ -r "$vhost_pid_file" ]] && spdk_vhost_kill $vhost_num + [[ -d $vhost_dir ]] && rm -f $vhost_dir/* + mkdir -p $vhost_dir + + if [[ ! -x $vhost_app ]]; then + error "application not found: $vhost_app" + return 1 + fi + + local reactor_mask="vhost_${vhost_num}_reactor_mask" + reactor_mask="${!reactor_mask}" + + local master_core="vhost_${vhost_num}_master_core" + master_core="${!master_core}" + + if [[ -z "$reactor_mask" ]] || [[ -z "$master_core" ]]; then + error "Parameters vhost_${vhost_num}_reactor_mask or vhost_${vhost_num}_master_core not found in autotest.config file" + return 1 + fi + + local cmd="$vhost_app -m $reactor_mask -p $master_core -s $memory -r $vhost_dir/rpc.sock $no_pci" + if [[ -n "$vhost_conf_path" ]]; then + cp $vhost_conf_template $vhost_conf_file + $SPDK_BUILD_DIR/scripts/gen_nvme.sh >> $vhost_conf_file + cmd="$vhost_app -m $reactor_mask -p $master_core -c $vhost_conf_file -s $memory -r $vhost_dir/rpc.sock $no_pci" + fi + + notice "Loging to: $vhost_log_file" + notice "Socket: $vhost_socket" + notice "Command: $cmd" + + timing_enter vhost_start + cd $vhost_dir; $cmd & + vhost_pid=$! + echo $vhost_pid > $vhost_pid_file + + notice "waiting for app to run..." + waitforlisten "$vhost_pid" "$vhost_dir/rpc.sock" + #do not generate nvmes if pci access is disabled + if [[ -z "$vhost_conf_path" ]] && [[ -z "$no_pci" ]]; then + $SPDK_BUILD_DIR/scripts/gen_nvme.sh "--json" | $SPDK_BUILD_DIR/scripts/rpc.py\ + -s $vhost_dir/rpc.sock load_subsystem_config + fi + + if [[ -n "$vhost_json_path" ]]; then + $SPDK_BUILD_DIR/scripts/rpc.py -s $vhost_dir/rpc.sock load_config < "$vhost_json_path/conf.json" + fi + + notice "vhost started - pid=$vhost_pid" + timing_exit vhost_start + + rm -f $vhost_conf_file +} + +function spdk_vhost_kill() +{ + local rc=0 + local vhost_num=0 + if [[ ! -z "$1" ]]; then + vhost_num=$1 + assert_number "$vhost_num" + fi + + local vhost_pid_file="$(get_vhost_dir $vhost_num)/vhost.pid" + + if [[ ! -r $vhost_pid_file ]]; then + warning "no vhost pid file found" + return 0 + fi + + timing_enter vhost_kill + local vhost_pid="$(cat $vhost_pid_file)" + notice "killing vhost (PID $vhost_pid) app" + + if /bin/kill -INT $vhost_pid >/dev/null; then + notice "sent SIGINT to vhost app - waiting 60 seconds to exit" + for ((i=0; i<60; i++)); do + if /bin/kill -0 $vhost_pid; then + echo "." + sleep 1 + else + break + fi + done + if /bin/kill -0 $vhost_pid; then + error "ERROR: vhost was NOT killed - sending SIGABRT" + /bin/kill -ABRT $vhost_pid + rm $vhost_pid_file + rc=1 + else + while kill -0 $vhost_pid; do + echo "." + done + fi + elif /bin/kill -0 $vhost_pid; then + error "vhost NOT killed - you need to kill it manually" + rc=1 + else + notice "vhost was no running" + fi + + timing_exit vhost_kill + if [[ $rc == 0 ]]; then + rm $vhost_pid_file + fi + + return $rc +} + +### +# Mgmt functions +### + +function assert_number() +{ + [[ "$1" =~ [0-9]+ ]] && return 0 + + error "Invalid or missing paramter: need number but got '$1'" + return 1; +} + +# Helper to validate VM number +# param $1 VM number +# +function vm_num_is_valid() +{ + [[ "$1" =~ ^[0-9]+$ ]] && return 0 + + error "Invalid or missing paramter: vm number '$1'" + return 1; +} + + +# Print network socket for given VM number +# param $1 virtual machine number +# +function vm_ssh_socket() +{ + vm_num_is_valid $1 || return 1 + local vm_dir="$VM_BASE_DIR/$1" + + cat $vm_dir/ssh_socket +} + +function vm_fio_socket() +{ + vm_num_is_valid $1 || return 1 + local vm_dir="$VM_BASE_DIR/$1" + + cat $vm_dir/fio_socket +} + +function vm_create_ssh_config() +{ + local ssh_config="$VM_BASE_DIR/ssh_config" + if [[ ! -f $ssh_config ]]; then + ( + echo "Host *" + echo " ControlPersist=10m" + echo " ConnectTimeout=1" + echo " Compression=no" + echo " ControlMaster=auto" + echo " UserKnownHostsFile=/dev/null" + echo " StrictHostKeyChecking=no" + echo " User root" + echo " ControlPath=/tmp/%r@%h:%p.ssh" + echo "" + ) > $ssh_config + # Control path created at /tmp because of live migration test case 3. + # In case of using sshfs share for the test - control path cannot be + # on share because remote server will fail on ssh commands. + fi +} + +# Execute ssh command on given VM +# param $1 virtual machine number +# +function vm_ssh() +{ + vm_num_is_valid $1 || return 1 + vm_create_ssh_config + local ssh_config="$VM_BASE_DIR/ssh_config" + + local ssh_cmd="ssh -i $SPDK_VHOST_SSH_KEY_FILE -F $ssh_config \ + -p $(vm_ssh_socket $1) $VM_SSH_OPTIONS 127.0.0.1" + + shift + $ssh_cmd "$@" +} + +# Execute scp command on given VM +# param $1 virtual machine number +# +function vm_scp() +{ + vm_num_is_valid $1 || return 1 + vm_create_ssh_config + local ssh_config="$VM_BASE_DIR/ssh_config" + + local scp_cmd="scp -i $SPDK_VHOST_SSH_KEY_FILE -F $ssh_config \ + -P $(vm_ssh_socket $1) " + + shift + $scp_cmd "$@" +} + + +# check if specified VM is running +# param $1 VM num +function vm_is_running() +{ + vm_num_is_valid $1 || return 1 + local vm_dir="$VM_BASE_DIR/$1" + + if [[ ! -r $vm_dir/qemu.pid ]]; then + return 1 + fi + + local vm_pid="$(cat $vm_dir/qemu.pid)" + + if /bin/kill -0 $vm_pid; then + return 0 + else + if [[ $EUID -ne 0 ]]; then + warning "not root - assuming VM running since can't be checked" + return 0 + fi + + # not running - remove pid file + rm $vm_dir/qemu.pid + return 1 + fi +} + +# check if specified VM is running +# param $1 VM num +function vm_os_booted() +{ + vm_num_is_valid $1 || return 1 + local vm_dir="$VM_BASE_DIR/$1" + + if [[ ! -r $vm_dir/qemu.pid ]]; then + error "VM $1 is not running" + return 1 + fi + + if ! VM_SSH_OPTIONS="-o ControlMaster=no" vm_ssh $1 "true" 2>/dev/null; then + # Shutdown existing master. Ignore errors as it might not exist. + VM_SSH_OPTIONS="-O exit" vm_ssh $1 "true" 2>/dev/null + return 1 + fi + + return 0 +} + + +# Shutdown given VM +# param $1 virtual machine number +# return non-zero in case of error. +function vm_shutdown() +{ + vm_num_is_valid $1 || return 1 + local vm_dir="$VM_BASE_DIR/$1" + if [[ ! -d "$vm_dir" ]]; then + error "VM$1 ($vm_dir) not exist - setup it first" + return 1 + fi + + if ! vm_is_running $1; then + notice "VM$1 ($vm_dir) is not running" + return 0 + fi + + # Temporarily disabling exit flag for next ssh command, since it will + # "fail" due to shutdown + notice "Shutting down virtual machine $vm_dir" + set +e + vm_ssh $1 "nohup sh -c 'shutdown -h -P now'" || true + notice "VM$1 is shutting down - wait a while to complete" + set -e +} + +# Kill given VM +# param $1 virtual machine number +# +function vm_kill() +{ + vm_num_is_valid $1 || return 1 + local vm_dir="$VM_BASE_DIR/$1" + + if [[ ! -r $vm_dir/qemu.pid ]]; then + return 0 + fi + + local vm_pid="$(cat $vm_dir/qemu.pid)" + + notice "Killing virtual machine $vm_dir (pid=$vm_pid)" + # First kill should fail, second one must fail + if /bin/kill $vm_pid; then + notice "process $vm_pid killed" + rm $vm_dir/qemu.pid + elif vm_is_running $1; then + error "Process $vm_pid NOT killed" + return 1 + fi +} + +# List all VM numbers in VM_BASE_DIR +# +function vm_list_all() +{ + local vms="$(shopt -s nullglob; echo $VM_BASE_DIR/[0-9]*)" + if [[ ! -z "$vms" ]]; then + basename --multiple $vms + fi +} + +# Kills all VM in $VM_BASE_DIR +# +function vm_kill_all() +{ + local vm + for vm in $(vm_list_all); do + vm_kill $vm + done +} + +# Shutdown all VM in $VM_BASE_DIR +# +function vm_shutdown_all() +{ + local shell_restore_x="$( [[ "$-" =~ x ]] && echo 'set -x' )" + # XXX: temporally disable to debug shutdown issue + # set +x + + local vms=$(vm_list_all) + local vm + + for vm in $vms; do + vm_shutdown $vm + done + + notice "Waiting for VMs to shutdown..." + local timeo=30 + while [[ $timeo -gt 0 ]]; do + local all_vms_down=1 + for vm in $vms; do + if vm_is_running $vm; then + all_vms_down=0 + break + fi + done + + if [[ $all_vms_down == 1 ]]; then + notice "All VMs successfully shut down" + $shell_restore_x + return 0 + fi + + ((timeo-=1)) + sleep 1 + done + + $shell_restore_x + error "Timeout waiting for some VMs to shutdown" + return 1 +} + +function vm_setup() +{ + local shell_restore_x="$( [[ "$-" =~ x ]] && echo 'set -x' )" + local OPTIND optchar vm_num + + local os="" + local os_mode="" + local qemu_args="" + local disk_type_g=NOT_DEFINED + local read_only="false" + local disks="" + local raw_cache="" + local vm_incoming="" + local vm_migrate_to="" + local force_vm="" + local guest_memory=1024 + local queue_number="" + local vhost_dir="$(get_vhost_dir)" + while getopts ':-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + os=*) local os="${OPTARG#*=}" ;; + os-mode=*) local os_mode="${OPTARG#*=}" ;; + qemu-args=*) local qemu_args="${qemu_args} ${OPTARG#*=}" ;; + disk-type=*) local disk_type_g="${OPTARG#*=}" ;; + read-only=*) local read_only="${OPTARG#*=}" ;; + disks=*) local disks="${OPTARG#*=}" ;; + raw-cache=*) local raw_cache=",cache${OPTARG#*=}" ;; + force=*) local force_vm=${OPTARG#*=} ;; + memory=*) local guest_memory=${OPTARG#*=} ;; + queue_num=*) local queue_number=${OPTARG#*=} ;; + incoming=*) local vm_incoming="${OPTARG#*=}" ;; + migrate-to=*) local vm_migrate_to="${OPTARG#*=}" ;; + vhost-num=*) local vhost_dir="$(get_vhost_dir ${OPTARG#*=})" ;; + spdk-boot=*) local boot_from="${OPTARG#*=}" ;; + *) + error "unknown argument $OPTARG" + return 1 + esac + ;; + *) + error "vm_create Unknown param $OPTARG" + return 1 + ;; + esac + done + + # Find next directory we can use + if [[ ! -z $force_vm ]]; then + vm_num=$force_vm + + vm_num_is_valid $vm_num || return 1 + local vm_dir="$VM_BASE_DIR/$vm_num" + [[ -d $vm_dir ]] && warning "removing existing VM in '$vm_dir'" + else + local vm_dir="" + + set +x + for (( i=0; i<=256; i++)); do + local vm_dir="$VM_BASE_DIR/$i" + [[ ! -d $vm_dir ]] && break + done + $shell_restore_x + + vm_num=$i + fi + + if [[ $i -eq 256 ]]; then + error "no free VM found. do some cleanup (256 VMs created, are you insane?)" + return 1 + fi + + if [[ ! -z "$vm_migrate_to" && ! -z "$vm_incoming" ]]; then + error "'--incoming' and '--migrate-to' cannot be used together" + return 1 + elif [[ ! -z "$vm_incoming" ]]; then + if [[ ! -z "$os_mode" || ! -z "$os_img" ]]; then + error "'--incoming' can't be used together with '--os' nor '--os-mode'" + return 1 + fi + + os_mode="original" + os="$VM_BASE_DIR/$vm_incoming/os.qcow2" + elif [[ ! -z "$vm_migrate_to" ]]; then + [[ "$os_mode" != "backing" ]] && warning "Using 'backing' mode for OS since '--migrate-to' is used" + os_mode=backing + fi + + notice "Creating new VM in $vm_dir" + mkdir -p $vm_dir + + if [[ "$os_mode" == "backing" ]]; then + notice "Creating backing file for OS image file: $os" + if ! $QEMU_PREFIX/bin/qemu-img create -f qcow2 -b $os $vm_dir/os.qcow2; then + error "Failed to create OS backing file in '$vm_dir/os.qcow2' using '$os'" + return 1 + fi + + local os=$vm_dir/os.qcow2 + elif [[ "$os_mode" == "original" ]]; then + warning "Using original OS image file: $os" + elif [[ "$os_mode" != "snapshot" ]]; then + if [[ -z "$os_mode" ]]; then + notice "No '--os-mode' parameter provided - using 'snapshot'" + os_mode="snapshot" + else + error "Invalid '--os-mode=$os_mode'" + return 1 + fi + fi + + # WARNING: + # each cmd+= must contain ' ${eol}' at the end + # + local eol="\\\\\n " + local qemu_mask_param="VM_${vm_num}_qemu_mask" + local qemu_numa_node_param="VM_${vm_num}_qemu_numa_node" + + if [[ -z "${!qemu_mask_param}" ]] || [[ -z "${!qemu_numa_node_param}" ]]; then + error "Parameters ${qemu_mask_param} or ${qemu_numa_node_param} not found in autotest.config file" + return 1 + fi + + local task_mask=${!qemu_mask_param} + + notice "TASK MASK: $task_mask" + local cmd="taskset -a -c $task_mask $QEMU_PREFIX/bin/qemu-system-x86_64 ${eol}" + local vm_socket_offset=$(( 10000 + 100 * vm_num )) + + local ssh_socket=$(( vm_socket_offset + 0 )) + local fio_socket=$(( vm_socket_offset + 1 )) + local monitor_port=$(( vm_socket_offset + 2 )) + local migration_port=$(( vm_socket_offset + 3 )) + local gdbserver_socket=$(( vm_socket_offset + 4 )) + local vnc_socket=$(( 100 + vm_num )) + local qemu_pid_file="$vm_dir/qemu.pid" + local cpu_num=0 + + set +x + # cpu list for taskset can be comma separated or range + # or both at the same time, so first split on commas + cpu_list=$(echo $task_mask | tr "," "\n") + queue_number=0 + for c in $cpu_list; do + # if range is detected - count how many cpus + if [[ $c =~ [0-9]+-[0-9]+ ]]; then + val=$(($c-1)) + val=${val#-} + else + val=1 + fi + cpu_num=$((cpu_num+val)) + queue_number=$((queue_number+val)) + done + + if [ -z $queue_number ]; then + queue_number=$cpu_num + fi + + $shell_restore_x + + local node_num=${!qemu_numa_node_param} + local boot_disk_present=false + notice "NUMA NODE: $node_num" + cmd+="-m $guest_memory --enable-kvm -cpu host -smp $cpu_num -vga std -vnc :$vnc_socket -daemonize ${eol}" + cmd+="-object memory-backend-file,id=mem,size=${guest_memory}M,mem-path=/dev/hugepages,share=on,prealloc=yes,host-nodes=$node_num,policy=bind ${eol}" + [[ $os_mode == snapshot ]] && cmd+="-snapshot ${eol}" + [[ ! -z "$vm_incoming" ]] && cmd+=" -incoming tcp:0:$migration_port ${eol}" + cmd+="-monitor telnet:127.0.0.1:$monitor_port,server,nowait ${eol}" + cmd+="-numa node,memdev=mem ${eol}" + cmd+="-pidfile $qemu_pid_file ${eol}" + cmd+="-serial file:$vm_dir/serial.log ${eol}" + cmd+="-D $vm_dir/qemu.log ${eol}" + cmd+="-net user,hostfwd=tcp::$ssh_socket-:22,hostfwd=tcp::$fio_socket-:8765 ${eol}" + cmd+="-net nic ${eol}" + if [[ -z "$boot_from" ]]; then + cmd+="-drive file=$os,if=none,id=os_disk ${eol}" + cmd+="-device ide-hd,drive=os_disk,bootindex=0 ${eol}" + fi + + if ( [[ $disks == '' ]] && [[ $disk_type_g == virtio* ]] ); then + disks=1 + fi + + for disk in ${disks//:/ }; do + if [[ $disk = *","* ]]; then + disk_type=${disk#*,} + disk=${disk%,*} + else + disk_type=$disk_type_g + fi + + case $disk_type in + virtio) + local raw_name="RAWSCSI" + local raw_disk=$vm_dir/test.img + + if [[ ! -z $disk ]]; then + [[ ! -b $disk ]] && touch $disk + local raw_disk=$(readlink -f $disk) + fi + + # Create disk file if it not exist or it is smaller than 1G + if ( [[ -f $raw_disk ]] && [[ $(stat --printf="%s" $raw_disk) -lt $((1024 * 1024 * 1024)) ]] ) || \ + [[ ! -e $raw_disk ]]; then + if [[ $raw_disk =~ /dev/.* ]]; then + error \ + "ERROR: Virtio disk point to missing device ($raw_disk) -\n" \ + " this is probably not what you want." + return 1 + fi + + notice "Creating Virtio disc $raw_disk" + dd if=/dev/zero of=$raw_disk bs=1024k count=1024 + else + notice "Using existing image $raw_disk" + fi + + cmd+="-device virtio-scsi-pci,num_queues=$queue_number ${eol}" + cmd+="-device scsi-hd,drive=hd$i,vendor=$raw_name ${eol}" + cmd+="-drive if=none,id=hd$i,file=$raw_disk,format=raw$raw_cache ${eol}" + ;; + spdk_vhost_scsi) + notice "using socket $vhost_dir/naa.$disk.$vm_num" + cmd+="-chardev socket,id=char_$disk,path=$vhost_dir/naa.$disk.$vm_num ${eol}" + cmd+="-device vhost-user-scsi-pci,id=scsi_$disk,num_queues=$queue_number,chardev=char_$disk" + if [[ "$disk" == "$boot_from" ]]; then + cmd+=",bootindex=0" + boot_disk_present=true + fi + cmd+=" ${eol}" + ;; + spdk_vhost_blk) + notice "using socket $vhost_dir/naa.$disk.$vm_num" + cmd+="-chardev socket,id=char_$disk,path=$vhost_dir/naa.$disk.$vm_num ${eol}" + cmd+="-device vhost-user-blk-pci,num-queues=$queue_number,chardev=char_$disk" + if [[ "$disk" == "$boot_from" ]]; then + cmd+=",bootindex=0" + boot_disk_present=true + fi + cmd+=" ${eol}" + ;; + kernel_vhost) + if [[ -z $disk ]]; then + error "need WWN for $disk_type" + return 1 + elif [[ ! $disk =~ ^[[:alpha:]]{3}[.][[:xdigit:]]+$ ]]; then + error "$disk_type - disk(wnn)=$disk does not look like WNN number" + return 1 + fi + notice "Using kernel vhost disk wwn=$disk" + cmd+=" -device vhost-scsi-pci,wwpn=$disk,num_queues=$queue_number ${eol}" + ;; + *) + error "unknown mode '$disk_type', use: virtio, spdk_vhost_scsi, spdk_vhost_blk or kernel_vhost" + return 1 + esac + done + + if [[ -n $boot_from ]] && [[ $boot_disk_present == false ]]; then + error "Boot from $boot_from is selected but device is not present" + return 1 + fi + + [[ ! -z $qemu_args ]] && cmd+=" $qemu_args ${eol}" + # remove last $eol + cmd="${cmd%\\\\\\n }" + + notice "Saving to $vm_dir/run.sh" + ( + echo '#!/bin/bash' + echo 'if [[ $EUID -ne 0 ]]; then ' + echo ' echo "Go away user come back as root"' + echo ' exit 1' + echo 'fi'; + echo + echo -e "qemu_cmd=\"$cmd\""; + echo + echo "echo 'Running VM in $vm_dir'" + echo "rm -f $qemu_pid_file" + echo '$qemu_cmd' + echo "echo 'Waiting for QEMU pid file'" + echo "sleep 1" + echo "[[ ! -f $qemu_pid_file ]] && sleep 1" + echo "[[ ! -f $qemu_pid_file ]] && echo 'ERROR: no qemu pid file found' && exit 1" + echo + echo "chmod +r $vm_dir/*" + echo + echo "echo '=== qemu.log ==='" + echo "cat $vm_dir/qemu.log" + echo "echo '=== qemu.log ==='" + echo '# EOF' + ) > $vm_dir/run.sh + chmod +x $vm_dir/run.sh + + # Save generated sockets redirection + echo $ssh_socket > $vm_dir/ssh_socket + echo $fio_socket > $vm_dir/fio_socket + echo $monitor_port > $vm_dir/monitor_port + + rm -f $vm_dir/migration_port + [[ -z $vm_incoming ]] || echo $migration_port > $vm_dir/migration_port + + echo $gdbserver_socket > $vm_dir/gdbserver_socket + echo $vnc_socket >> $vm_dir/vnc_socket + + [[ -z $vm_incoming ]] || ln -fs $VM_BASE_DIR/$vm_incoming $vm_dir/vm_incoming + [[ -z $vm_migrate_to ]] || ln -fs $VM_BASE_DIR/$vm_migrate_to $vm_dir/vm_migrate_to +} + +function vm_run() +{ + local OPTIND optchar vm + local run_all=false + local vms_to_run="" + + while getopts 'a-:' optchar; do + case "$optchar" in + a) run_all=true ;; + *) + error "Unknown param $OPTARG" + return 1 + ;; + esac + done + + if $run_all; then + vms_to_run="$(vm_list_all)" + else + shift $((OPTIND-1)) + for vm in $@; do + vm_num_is_valid $1 || return 1 + if [[ ! -x $VM_BASE_DIR/$vm/run.sh ]]; then + error "VM$vm not defined - setup it first" + return 1 + fi + vms_to_run+=" $vm" + done + fi + + for vm in $vms_to_run; do + if vm_is_running $vm; then + warning "VM$vm ($VM_BASE_DIR/$vm) already running" + continue + fi + + notice "running $VM_BASE_DIR/$vm/run.sh" + if ! $VM_BASE_DIR/$vm/run.sh; then + error "FAILED to run vm $vm" + return 1 + fi + done +} + +# Wait for all created VMs to boot. +# param $1 max wait time +function vm_wait_for_boot() +{ + assert_number $1 + + local shell_restore_x="$( [[ "$-" =~ x ]] && echo 'set -x' )" + set +x + + local all_booted=false + local timeout_time=$1 + [[ $timeout_time -lt 10 ]] && timeout_time=10 + local timeout_time=$(date -d "+$timeout_time seconds" +%s) + + notice "Waiting for VMs to boot" + shift + if [[ "$@" == "" ]]; then + local vms_to_check="$VM_BASE_DIR/[0-9]*" + else + local vms_to_check="" + for vm in $@; do + vms_to_check+=" $VM_BASE_DIR/$vm" + done + fi + + for vm in $vms_to_check; do + local vm_num=$(basename $vm) + local i=0 + notice "waiting for VM$vm_num ($vm)" + while ! vm_os_booted $vm_num; do + if ! vm_is_running $vm_num; then + + warning "VM $vm_num is not running" + warning "================" + warning "QEMU LOG:" + if [[ -r $vm/qemu.log ]]; then + cat $vm/qemu.log + else + warning "LOG not found" + fi + + warning "VM LOG:" + if [[ -r $vm/serial.log ]]; then + cat $vm/serial.log + else + warning "LOG not found" + fi + warning "================" + $shell_restore_x + return 1 + fi + + if [[ $(date +%s) -gt $timeout_time ]]; then + warning "timeout waiting for machines to boot" + $shell_restore_x + return 1 + fi + if (( i > 30 )); then + local i=0 + echo + fi + echo -n "." + sleep 1 + done + echo "" + notice "VM$vm_num ready" + #Change Timeout for stopping services to prevent lengthy powerdowns + vm_ssh $vm_num "echo 'DefaultTimeoutStopSec=10' >> /etc/systemd/system.conf; systemctl daemon-reexec" + done + + notice "all VMs ready" + $shell_restore_x + return 0 +} + +function vm_start_fio_server() +{ + local OPTIND optchar + local readonly='' + while getopts ':-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + fio-bin=*) local fio_bin="${OPTARG#*=}" ;; + readonly) local readonly="--readonly" ;; + *) error "Invalid argument '$OPTARG'" && return 1;; + esac + ;; + *) error "Invalid argument '$OPTARG'" && return 1;; + esac + done + + shift $(( OPTIND - 1 )) + for vm_num in $@; do + notice "Starting fio server on VM$vm_num" + if [[ $fio_bin != "" ]]; then + cat $fio_bin | vm_ssh $vm_num 'cat > /root/fio; chmod +x /root/fio' + vm_ssh $vm_num /root/fio $readonly --eta=never --server --daemonize=/root/fio.pid + else + vm_ssh $vm_num fio $readonly --eta=never --server --daemonize=/root/fio.pid + fi + done +} + +function vm_check_scsi_location() +{ + # Script to find wanted disc + local script='shopt -s nullglob; \ + for entry in /sys/block/sd*; do \ + disk_type="$(cat $entry/device/vendor)"; \ + if [[ $disk_type == INTEL* ]] || [[ $disk_type == RAWSCSI* ]] || [[ $disk_type == LIO-ORG* ]]; then \ + fname=$(basename $entry); \ + echo -n " $fname"; \ + fi; \ + done' + + SCSI_DISK="$(echo "$script" | vm_ssh $1 bash -s)" + + if [[ -z "$SCSI_DISK" ]]; then + error "no test disk found!" + return 1 + fi +} + +# Script to perform scsi device reset on all disks in VM +# param $1 VM num +# param $2..$n Disks to perform reset on +function vm_reset_scsi_devices() +{ + for disk in "${@:2}"; do + notice "VM$1 Performing device reset on disk $disk" + vm_ssh $1 sg_reset /dev/$disk -vNd + done +} + +function vm_check_blk_location() +{ + local script='shopt -s nullglob; cd /sys/block; echo vd*' + SCSI_DISK="$(echo "$script" | vm_ssh $1 bash -s)" + + if [[ -z "$SCSI_DISK" ]]; then + error "no blk test disk found!" + return 1 + fi +} + +function run_fio() +{ + local arg + local job_file="" + local fio_bin="" + local vms=() + local out="" + local fio_disks="" + local vm + local run_server_mode=true + + for arg in $@; do + case "$arg" in + --job-file=*) local job_file="${arg#*=}" ;; + --fio-bin=*) local fio_bin="${arg#*=}" ;; + --vm=*) vms+=( "${arg#*=}" ) ;; + --out=*) + local out="${arg#*=}" + mkdir -p $out + ;; + --local) run_server_mode=false ;; + --json) json="--json" ;; + *) + error "Invalid argument '$arg'" + return 1 + ;; + esac + done + + if [[ ! -z "$fio_bin" && ! -r "$fio_bin" ]]; then + error "FIO binary '$fio_bin' does not exist" + return 1 + fi + + if [[ ! -r "$job_file" ]]; then + error "Fio job '$job_file' does not exist" + return 1 + fi + + local job_fname=$(basename "$job_file") + # prepare job file for each VM + for vm in ${vms[@]}; do + local vm_num=${vm%%:*} + local vmdisks=${vm#*:} + + sed "s@filename=@filename=$vmdisks@" $job_file | vm_ssh $vm_num "cat > /root/$job_fname" + fio_disks+="127.0.0.1:$(vm_fio_socket $vm_num):$vmdisks," + + vm_ssh $vm_num cat /root/$job_fname + if ! $run_server_mode; then + if [[ ! -z "$fio_bin" ]]; then + cat $fio_bin | vm_ssh $vm_num 'cat > /root/fio; chmod +x /root/fio' + fi + + notice "Running local fio on VM $vm_num" + vm_ssh $vm_num "nohup /root/fio /root/$job_fname 1>/root/$job_fname.out 2>/root/$job_fname.out </dev/null & echo \$! > /root/fio.pid" + fi + done + + if ! $run_server_mode; then + # Give FIO time to run + sleep 0.5 + return 0 + fi + + $SPDK_BUILD_DIR/test/vhost/common/run_fio.py --job-file=/root/$job_fname \ + $([[ ! -z "$fio_bin" ]] && echo "--fio-bin=$fio_bin") \ + --out=$out $json ${fio_disks%,} +} + +# Shutdown or kill any running VM and SPDK APP. +# +function at_app_exit() +{ + local vhost_num + + notice "APP EXITING" + notice "killing all VMs" + vm_kill_all + # Kill vhost application + notice "killing vhost app" + + for vhost_num in $(spdk_vhost_list_all); do + spdk_vhost_kill $vhost_num + done + + notice "EXIT DONE" +} + +function error_exit() +{ + trap - ERR + print_backtrace + set +e + error "Error on $1 $2" + + at_app_exit + exit 1 +} diff --git a/src/spdk/test/vhost/common/fio_jobs/default_initiator.job b/src/spdk/test/vhost/common/fio_jobs/default_initiator.job new file mode 100644 index 00000000..43c1404b --- /dev/null +++ b/src/spdk/test/vhost/common/fio_jobs/default_initiator.job @@ -0,0 +1,9 @@ +[global] +thread=1 +group_reporting=1 +direct=1 +time_based=1 +do_verify=1 +verify=md5 +verify_backlog=1024 +fsync_on_close=1 diff --git a/src/spdk/test/vhost/common/fio_jobs/default_integrity.job b/src/spdk/test/vhost/common/fio_jobs/default_integrity.job new file mode 100644 index 00000000..06398b50 --- /dev/null +++ b/src/spdk/test/vhost/common/fio_jobs/default_integrity.job @@ -0,0 +1,19 @@ +[global] +blocksize_range=4k-512k +iodepth=512 +iodepth_batch=128 +iodepth_low=256 +ioengine=libaio +size=1G +io_size=4G +filename= +group_reporting +thread +numjobs=1 +direct=1 +rw=randwrite +do_verify=1 +verify=md5 +verify_backlog=1024 +fsync_on_close=1 +[nvme-host] diff --git a/src/spdk/test/vhost/common/fio_jobs/default_integrity_nightly.job b/src/spdk/test/vhost/common/fio_jobs/default_integrity_nightly.job new file mode 100644 index 00000000..09740178 --- /dev/null +++ b/src/spdk/test/vhost/common/fio_jobs/default_integrity_nightly.job @@ -0,0 +1,23 @@ +[global] +ioengine=libaio +runtime=10 +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_backlog=1024 + +[randwrite] +stonewall +rw=randwrite +bs=512k +iodepth=256 + +[randrw] +stonewall +rw=randrw +bs=128k +iodepth=64 diff --git a/src/spdk/test/vhost/common/fio_jobs/default_performance.job b/src/spdk/test/vhost/common/fio_jobs/default_performance.job new file mode 100644 index 00000000..a51cb5ed --- /dev/null +++ b/src/spdk/test/vhost/common/fio_jobs/default_performance.job @@ -0,0 +1,16 @@ +[global] +blocksize_range=4k-512k +iodepth=512 +iodepth_batch=128 +iodepth_low=256 +ioengine=libaio +size=10G +filename= +ramp_time=10 +group_reporting +thread +numjobs=1 +direct=1 +rw=randread +fsync_on_close=1 +[nvme-host] diff --git a/src/spdk/test/vhost/common/run_fio.py b/src/spdk/test/vhost/common/run_fio.py new file mode 100755 index 00000000..0760b018 --- /dev/null +++ b/src/spdk/test/vhost/common/run_fio.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 + +import os +import sys +import getopt +import subprocess +import signal +import re + +fio_bin = "fio" + + +def show_help(): + print("""Usage: {} run_fio.py [options] [args] + Description: + Run FIO job file 'fio.job' on remote machines. + NOTE: The job file must exist on remote machines on '/root/' directory. + Args: + [VMs] (ex. vm1_IP:vm1_port:vm1_disk1:vm_disk2,vm2_IP:vm2_port:vm2_disk1,etc...) + Options: + -h, --help Show this message. + -j, --job-file Paths to file with FIO job configuration on remote host. + -f, --fio-bin Location of FIO binary on local host (Default "fio") + -o, --out Directory used to save generated job files and + files with test results + -J, --json Use JSON format for output + -p, --perf-vmex Enable aggregating statistic for VMEXITS for VMs + """.format(os.path.split(sys.executable)[-1])) + + +def exec_cmd(cmd, blocking): + # Print result to STDOUT for now, we don't have json support yet. + p = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, stdin=subprocess.PIPE) + if blocking is True: + out, _ = p.communicate() + return p.returncode, out.decode() + return p + + +def save_file(path, mode, contents): + with open(path, mode) as fh: + fh.write(contents) + fh.close() + + +def run_fio(vms, fio_cfg_fname, out_path, perf_vmex=False, json=False): + global fio_bin + job_name = os.path.splitext(os.path.basename(fio_cfg_fname))[0] + + # Build command for FIO + fio_cmd = " ".join([fio_bin, "--eta=never"]) + if json: + fio_cmd = " ".join([fio_bin, "--output-format=json"]) + for vm in vms: + # vm[0] = IP address, vm[1] = Port number + fio_cmd = " ".join([fio_cmd, + "--client={vm_ip},{vm_port}".format(vm_ip=vm[0], vm_port=vm[1]), + "--remote-config {cfg}".format(cfg=fio_cfg_fname)]) + print(fio_cmd) + + if perf_vmex: + perf_dir = os.path.join(out_path, "perf_stats") + try: + os.mkdir(perf_dir) + except OSError: + pass + + # Start gathering perf statistics for host and VM guests + perf_rec_file = os.path.join(perf_dir, "perf.data.kvm") + perf_run_cmd = "perf kvm --host --guest " + \ + "-o {0} stat record -a".format(perf_rec_file) + print(perf_run_cmd) + perf_p = exec_cmd(perf_run_cmd, blocking=False) + + # Run FIO test on VMs + rc, out = exec_cmd(fio_cmd, blocking=True) + + # if for some reason output contains lines with "eta" - remove them + out = re.sub(r'.+\[eta\s+\d{2}m:\d{2}s\]', '', out) + + print(out) + + if rc != 0: + print("ERROR! While executing FIO jobs - RC: {rc}".format(rc=rc, out=out)) + sys.exit(rc) + else: + save_file(os.path.join(out_path, ".".join([job_name, "log"])), "w", out) + + if perf_vmex: + # Stop gathering perf statistics and prepare some result files + perf_p.send_signal(signal.SIGINT) + perf_p.wait() + + perf_stat_cmd = "perf kvm --host -i {perf_rec} stat report --event vmexit"\ + .format(perf_rec=perf_rec_file) + + rc, out = exec_cmd(" ".join([perf_stat_cmd, "--event vmexit"]), + blocking=True) + print("VMexit host stats:") + print("{perf_out}".format(perf_out=out)) + save_file(os.path.join(perf_dir, "vmexit_stats_" + job_name), + "w", "{perf_out}".format(perf_out=out)) + try: + os.remove(perf_rec_file) + except OSError: + pass + + +def main(): + global fio_bin + + abspath = os.path.abspath(__file__) + dname = os.path.dirname(abspath) + + vms = [] + fio_cfg = None + out_dir = None + perf_vmex = False + json = False + + try: + opts, args = getopt.getopt(sys.argv[1:], "hJj:f:o:p", + ["help", "job-file=", "fio-bin=", + "out=", "perf-vmex", "json"]) + except getopt.GetoptError: + show_help() + sys.exit(1) + + if len(args) < 1: + show_help() + sys.exit(1) + + for o, a in opts: + if o in ("-j", "--job-file"): + fio_cfg = a + elif o in ("-h", "--help"): + show_help() + sys.exit(1) + elif o in ("-p", "--perf-vmex"): + perf_vmex = True + elif o in ("-o", "--out"): + out_dir = a + elif o in ("-f", "--fio-bin"): + fio_bin = a + elif o in ("-J", "--json"): + json = True + + if fio_cfg is None: + print("ERROR! No FIO job provided!") + sys.exit(1) + + if out_dir is None or not os.path.exists(out_dir): + print("ERROR! Folder {out_dir} does not exist ".format(out_dir=out_dir)) + sys.exit(1) + + # Get IP, port and fio 'filename' information from positional args + for arg in args[0].split(","): + _ = arg.split(":") + ip, port, filenames = _[0], _[1], ":".join(_[2:]) + vms.append((ip, port, filenames)) + + print("Running job file: {0}".format(fio_cfg)) + run_fio(vms, fio_cfg, out_dir, perf_vmex, json) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/spdk/test/vhost/common/run_vhost.sh b/src/spdk/test/vhost/common/run_vhost.sh new file mode 100755 index 00000000..bd6c496a --- /dev/null +++ b/src/spdk/test/vhost/common/run_vhost.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +vhost_num="" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for running vhost app." + echo "Usage: $(basename $1) [-x] [-h|--help] [--clean-build] [--work-dir=PATH]" + echo "-h, --help print help and exit" + echo "-x Set -x for script debug" + echo " --work-dir=PATH Where to find source/project. [default=$TEST_DIR]" + echo " --conf-dir=PATH Path to directory with configuration for vhost" + echo " --vhost-num=NUM Optional: vhost instance NUM to start. Default: 0" + + exit 0 +} + +run_in_background=false +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + conf-dir=*) CONF_DIR="${OPTARG#*=}" ;; + vhost-num=*) vhost_num="${OPTARG}" ;; + *) usage $0 echo "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x ;; + *) usage $0 "Invalid argument '$optchar'" ;; + esac +done + +if [[ $EUID -ne 0 ]]; then + fail "Go away user come back as root" +fi + +notice "$0" +notice "" + +. $COMMON_DIR/common.sh + +# Starting vhost with valid options +spdk_vhost_run $vhost_num --conf-path=$CONF_DIR diff --git a/src/spdk/test/vhost/common/vm_run.sh b/src/spdk/test/vhost/common/vm_run.sh new file mode 100755 index 00000000..03938f8c --- /dev/null +++ b/src/spdk/test/vhost/common/vm_run.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for enabling VMs" + echo "Usage: $(basename $1) [OPTIONS] VM..." + echo + echo "-h, --help print help and exit" + echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: ./..]" + echo "-a Run all VMs in WORK_DIR" + echo "-x set -x for script debug" + exit 0 +} +run_all=false +while getopts 'xah-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + a) run_all=true ;; + x) set -x ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done + +. $COMMON_DIR/common.sh + +if [[ $EUID -ne 0 ]]; then + fail "Go away user come back as root" +fi + +if $run_all; then + vm_run -a +else + shift $((OPTIND-1)) + notice "running VMs: $@" + vm_run "$@" +fi diff --git a/src/spdk/test/vhost/common/vm_setup.sh b/src/spdk/test/vhost/common/vm_setup.sh new file mode 100755 index 00000000..7e3599fd --- /dev/null +++ b/src/spdk/test/vhost/common/vm_setup.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for setting up VMs for tests" + echo "Usage: $(basename $1) [OPTIONS] VM_NUM" + echo + echo "-h, --help Print help and exit" + echo " --work-dir=WORK_DIR Where to find build file. Must exit. (default: $TEST_DIR)" + echo " --force=VM_NUM Force VM_NUM reconfiguration if already exist" + echo " --disk-type=TYPE Perform specified test:" + echo " virtio - test host virtio-scsi-pci using file as disk image" + echo " kernel_vhost - use kernel driver vhost-scsi" + echo " spdk_vhost_scsi - use spdk vhost scsi" + echo " spdk_vhost_blk - use spdk vhost block" + echo " --read-only=true|false Enable/Disable read only for vhost_blk tests" + echo " --raw-cache=CACHE Use CACHE for virtio test: " + echo " writethrough, writeback, none, unsafe or directsyns" + echo " --disk=PATH[,disk_type] Disk to use in test. test specific meaning:" + echo " virtio - disk path (file or block device ex: /dev/nvme0n1)" + echo " kernel_vhost - the WWN number to be used" + echo " spdk_vhost_[scsi|blk] - the socket path." + echo " optional disk_type - set disk type for disk (overwrites test-type)" + echo " e.g. /dev/nvme0n1,spdk_vhost_scsi" + echo " --os=OS_QCOW2 Custom OS qcow2 image file" + echo " --os-mode=MODE MODE how to use provided image: default: backing" + echo " backing - create new image but use provided backing file" + echo " copy - copy provided image and use a copy" + echo " orginal - use file directly. Will modify the provided file" + echo " --incoming=VM_NUM Use VM_NUM as source migration VM." + echo " --migrate-to=VM_NUM Use VM_NUM as target migration VM." + echo " --vhost-num=NUM Optional: vhost instance NUM to be used by this VM. Default: 0" + echo "-x Turn on script debug (set -x)" + echo "-v Be more verbose" + exit 0 +} + +setup_params=() +for param in "$@"; do + case "$param" in + --help|-h) usage $0 ;; + --work-dir=*) + TEST_DIR="${param#*=}" + continue + ;; + --raw-cache=*) ;; + --disk-type=*) ;; + --disks=*) ;; + --os=*) ;; + --os-mode=*) ;; + --force=*) ;; + --incoming=*) ;; + --migrate-to=*) ;; + --read-only=*) ;; + -x) + set -x + continue + ;; + -v) + SPDK_VHOST_VERBOSE=true + continue + ;; + *) usage $0 "Invalid argument '$param'" ;; + esac + + setup_params+=( "$param" ) +done + +. $COMMON_DIR/common.sh + +vm_setup ${setup_params[@]} + +trap -- ERR diff --git a/src/spdk/test/vhost/common/vm_shutdown.sh b/src/spdk/test/vhost/common/vm_shutdown.sh new file mode 100755 index 00000000..1de1170f --- /dev/null +++ b/src/spdk/test/vhost/common/vm_shutdown.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for shutting down VMs" + echo "Usage: $(basename $1) [OPTIONS] [VMs]" + echo + echo "-h, --help print help and exit" + echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: ./..]" + echo "-a kill/shutdown all running VMs" + echo "-k kill instead of shutdown" + exit 0 +} +optspec='akh-:' +do_kill=false +all=false + +while getopts "$optspec" optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + k) do_kill=true ;; + a) all=true ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done + +. $COMMON_DIR/common.sh + +if $do_kill && [[ $EUID -ne 0 ]]; then + echo "Go away user come back as root" + exit 1 +fi + +if $all; then + if do_kill; then + notice "killing all VMs" + vm_kill_all + else + notice "shutting down all VMs" + vm_shutdown_all + fi +else + shift $((OPTIND-1)) + + if do_kill; then + notice "INFO: killing VMs: $@" + for vm in $@; do + vm_kill $vm + done + else + notice "shutting down all VMs" + vm_shutdown_all + fi +fi diff --git a/src/spdk/test/vhost/common/vm_ssh.sh b/src/spdk/test/vhost/common/vm_ssh.sh new file mode 100755 index 00000000..abdc3322 --- /dev/null +++ b/src/spdk/test/vhost/common/vm_ssh.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for connecting to or executing command on selected VM" + echo "Usage: $(basename $1) [OPTIONS] VM_NUMBER" + echo + echo "-h, --help print help and exit" + echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: $TEST_DIR]" + echo "-w Don't wait for vm to boot" + echo "-x set -x for script debug" + exit 0 +} + +boot_wait=true +while getopts 'xwh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac ;; + h) usage $0 ;; + w) boot_wait=false ;; + x) set -x ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac +done + +. $COMMON_DIR/common.sh + +shift $((OPTIND-1)) +vm_num="$1" +shift + + +if ! vm_num_is_valid $vm_num; then + usage $0 "Invalid VM num $vm_num" + exit 1 +fi + +if $boot_wait; then + while ! vm_os_booted $vm_num; do + if ! vm_is_running $vm_num; then + fail "VM$vm_num is not running" + fi + notice "waiting for VM$vm_num to boot" + sleep 1 + done +fi + +vm_ssh $vm_num "$@" diff --git a/src/spdk/test/vhost/fiotest/autotest.sh b/src/spdk/test/vhost/fiotest/autotest.sh new file mode 100755 index 00000000..466ac141 --- /dev/null +++ b/src/spdk/test/vhost/fiotest/autotest.sh @@ -0,0 +1,247 @@ +#!/usr/bin/env bash +set -e +AUTOTEST_BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $AUTOTEST_BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $AUTOTEST_BASE_DIR/../../../../ && pwd)" + +dry_run=false +no_shutdown=false +fio_bin="" +remote_fio_bin="" +fio_jobs="" +test_type=spdk_vhost_scsi +reuse_vms=false +vms=() +used_vms="" +x="" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for doing automated test" + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo "-h, --help print help and exit" + echo " --test-type=TYPE Perform specified test:" + echo " virtio - test host virtio-scsi-pci using file as disk image" + echo " kernel_vhost - use kernel driver vhost-scsi" + echo " spdk_vhost_scsi - use spdk vhost scsi" + echo " spdk_vhost_blk - use spdk vhost block" + echo "-x set -x for script debug" + echo " --fio-bin=FIO Use specific fio binary (will be uploaded to VM)" + echo " --fio-job= Fio config to use for test." + echo " All VMs will run the same fio job when FIO executes." + echo " (no unique jobs for specific VMs)" + echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: $TEST_DIR]" + echo " --dry-run Don't perform any tests, run only and wait for enter to terminate" + echo " --no-shutdown Don't shutdown at the end but leave envirionment working" + echo " --vm=NUM[,OS][,DISKS] VM configuration. This parameter might be used more than once:" + echo " NUM - VM number (mandatory)" + echo " OS - VM os disk path (optional)" + echo " DISKS - VM os test disks/devices path (virtio - optional, kernel_vhost - mandatory)" + exit 0 +} + +#default raw file is NVMe drive + +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;; + fio-job=*) fio_job="${OPTARG#*=}" ;; + dry-run) dry_run=true ;; + no-shutdown) no_shutdown=true ;; + test-type=*) test_type="${OPTARG#*=}" ;; + vm=*) vms+=("${OPTARG#*=}") ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x + x="-x" ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done +shift $(( OPTIND - 1 )) + +if [[ ! -r "$fio_job" ]]; then + fail "no fio job file specified" +fi + +. $COMMON_DIR/common.sh + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' ERR + +vm_kill_all + +if [[ $test_type =~ "spdk_vhost" ]]; then + notice "===============" + notice "" + notice "running SPDK" + notice "" + spdk_vhost_run --json-path=$AUTOTEST_BASE_DIR + notice "" +fi + +notice "===============" +notice "" +notice "Setting up VM" +notice "" + +rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +for vm_conf in ${vms[@]}; do + IFS=',' read -ra conf <<< "$vm_conf" + if [[ x"${conf[0]}" == x"" ]] || ! assert_number ${conf[0]}; then + fail "invalid VM configuration syntax $vm_conf" + fi + + # Sanity check if VM is not defined twice + for vm_num in $used_vms; do + if [[ $vm_num -eq ${conf[0]} ]]; then + fail "VM$vm_num defined more than twice ( $(printf "'%s' " "${vms[@]}"))!" + fi + done + + used_vms+=" ${conf[0]}" + + if [[ $test_type =~ "spdk_vhost" ]]; then + + notice "Adding device via RPC ..." + + while IFS=':' read -ra disks; do + for disk in "${disks[@]}"; do + if [[ "$test_type" == "spdk_vhost_blk" ]]; then + disk=${disk%%_*} + notice "Creating vhost block controller naa.$disk.${conf[0]} with device $disk" + $rpc_py construct_vhost_blk_controller naa.$disk.${conf[0]} $disk + else + notice "Creating controller naa.$disk.${conf[0]}" + $rpc_py construct_vhost_scsi_controller naa.$disk.${conf[0]} + + notice "Adding device (0) to naa.$disk.${conf[0]}" + $rpc_py add_vhost_scsi_lun naa.$disk.${conf[0]} 0 $disk + fi + done + done <<< "${conf[2]}" + unset IFS; + $rpc_py get_vhost_controllers + fi + + setup_cmd="vm_setup --force=${conf[0]} --disk-type=$test_type" + [[ x"${conf[1]}" != x"" ]] && setup_cmd+=" --os=${conf[1]}" + [[ x"${conf[2]}" != x"" ]] && setup_cmd+=" --disks=${conf[2]}" + + $setup_cmd +done + +# Run everything +vm_run $used_vms +vm_wait_for_boot 600 $used_vms + +if [[ $test_type == "spdk_vhost_scsi" ]]; then + for vm_conf in ${vms[@]}; do + IFS=',' read -ra conf <<< "$vm_conf" + while IFS=':' read -ra disks; do + for disk in "${disks[@]}"; do + notice "Hotdetach test. Trying to remove existing device from a controller naa.$disk.${conf[0]}" + $rpc_py remove_vhost_scsi_target naa.$disk.${conf[0]} 0 + + sleep 0.1 + + notice "Hotattach test. Re-adding device 0 to naa.$disk.${conf[0]}" + $rpc_py add_vhost_scsi_lun naa.$disk.${conf[0]} 0 $disk + done + done <<< "${conf[2]}" + unset IFS; + done +fi + +sleep 0.1 + +notice "===============" +notice "" +notice "Testing..." + +notice "Running fio jobs ..." + +# Check if all VM have disk in tha same location +DISK="" + +fio_disks="" +for vm_num in $used_vms; do + vm_dir=$VM_BASE_DIR/$vm_num + + qemu_mask_param="VM_${vm_num}_qemu_mask" + + host_name="VM-$vm_num" + notice "Setting up hostname: $host_name" + vm_ssh $vm_num "hostname $host_name" + vm_start_fio_server $fio_bin $readonly $vm_num + + if [[ "$test_type" == "spdk_vhost_scsi" ]]; then + vm_check_scsi_location $vm_num + #vm_reset_scsi_devices $vm_num $SCSI_DISK + elif [[ "$test_type" == "spdk_vhost_blk" ]]; then + vm_check_blk_location $vm_num + fi + + fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)" +done + +if $dry_run; then + read -p "Enter to kill evething" xx + sleep 3 + at_app_exit + exit 0 +fi + +run_fio $fio_bin --job-file="$fio_job" --out="$TEST_DIR/fio_results" $fio_disks + +if [[ "$test_type" == "spdk_vhost_scsi" ]]; then + for vm_num in $used_vms; do + vm_reset_scsi_devices $vm_num $SCSI_DISK + done +fi + +if ! $no_shutdown; then + notice "===============" + notice "APP EXITING" + notice "killing all VMs" + vm_shutdown_all + notice "waiting 2 seconds to let all VMs die" + sleep 2 + if [[ $test_type =~ "spdk_vhost" ]]; then + notice "Removing vhost devices & controllers via RPC ..." + for vm_conf in ${vms[@]}; do + IFS=',' read -ra conf <<< "$vm_conf" + + while IFS=':' read -ra disks; do + for disk in "${disks[@]}"; do + disk=${disk%%_*} + notice "Removing all vhost devices from controller naa.$disk.${conf[0]}" + if [[ "$test_type" == "spdk_vhost_scsi" ]]; then + $rpc_py remove_vhost_scsi_target naa.$disk.${conf[0]} 0 + fi + + $rpc_py remove_vhost_controller naa.$disk.${conf[0]} + done + done <<< "${conf[2]}" + done + fi + notice "Testing done -> shutting down" + notice "killing vhost app" + spdk_vhost_kill + + notice "EXIT DONE" + notice "===============" +else + notice "===============" + notice "" + notice "Leaving environment working!" + notice "" + notice "===============" +fi diff --git a/src/spdk/test/vhost/fiotest/conf.json b/src/spdk/test/vhost/fiotest/conf.json new file mode 100644 index 00000000..7a1594b2 --- /dev/null +++ b/src/spdk/test/vhost/fiotest/conf.json @@ -0,0 +1,80 @@ +{ + "subsystems": [ + { + "subsystem": "copy", + "config": null + }, + { + "subsystem": "interface", + "config": null + }, + { + "subsystem": "net_framework", + "config": null + }, + { + "subsystem": "bdev", + "config": [ + { + "params": { + "base_bdev": "Nvme0n1", + "split_size_mb": 0, + "split_count": 4 + }, + "method": "construct_split_vbdev" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32768 + }, + "method": "construct_malloc_bdev" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32768 + }, + "method": "construct_malloc_bdev" + } + ] + }, + { + "subsystem": "nbd", + "config": [] + }, + { + "subsystem": "scsi", + "config": null + }, + { + "subsystem": "vhost", + "config": [ + { + "params": { + "cpumask": "0x1", + "ctrlr": "vhost.0" + }, + "method": "construct_vhost_scsi_controller" + }, + { + "params": { + "scsi_target_num": 0, + "bdev_name": "Malloc0", + "ctrlr": "vhost.0" + }, + "method": "add_vhost_scsi_lun" + }, + { + "params": { + "dev_name": "Malloc1", + "readonly": true, + "ctrlr": "vhost.1", + "cpumask": "0x1" + }, + "method": "construct_vhost_blk_controller" + } + ] + } + ] +} diff --git a/src/spdk/test/vhost/hotplug/blk_hotremove.sh b/src/spdk/test/vhost/hotplug/blk_hotremove.sh new file mode 100644 index 00000000..a350d90d --- /dev/null +++ b/src/spdk/test/vhost/hotplug/blk_hotremove.sh @@ -0,0 +1,236 @@ +# Vhost blk hot remove tests +# +# Objective +# The purpose of these tests is to verify that SPDK vhost remains stable during +# hot-remove operations performed on SCSI and BLK controllers devices. +# Hot-remove is a scenario where a NVMe device is removed when already in use. +# +# Test cases description +# 1. FIO I/O traffic is run during hot-remove operations. +# By default FIO uses default_integrity*.job config files located in +# test/vhost/hotplug/fio_jobs directory. +# 2. FIO mode of operation is random write (randwrite) with verification enabled +# which results in also performing read operations. +# 3. In test cases fio status is checked after every run if any errors occurred. + +function prepare_fio_cmd_tc1() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_detach_job + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_blk_location $vm_num + for disk in $SCSI_DISK; do + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + done + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/default_integrity_2discs.job + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/default_integrity_2discs.job " + rm $tmp_detach_job + done +} + +function remove_vhost_controllers() { + $rpc_py remove_vhost_controller naa.Nvme0n1p0.0 + $rpc_py remove_vhost_controller naa.Nvme0n1p1.0 + $rpc_py remove_vhost_controller naa.Nvme0n1p2.1 + $rpc_py remove_vhost_controller naa.Nvme0n1p3.1 +} + +# Vhost blk hot remove test cases +# +# Test Case 1 +function blk_hotremove_tc1() { + echo "Blk hotremove test case 1" + traddr="" + # 1. Run the command to hot remove NVMe disk. + get_traddr "Nvme0" + delete_nvme "Nvme0" + # 2. If vhost had crashed then tests would stop running + sleep 1 + add_nvme "HotInNvme0" "$traddr" + sleep 1 +} + +# Test Case 2 +function blk_hotremove_tc2() { + echo "Blk hotremove test case 2" + # 1. Use rpc command to create blk controllers. + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p0.0 HotInNvme0n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p1.0 Nvme1n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p2.1 Nvme1n1p1 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p3.1 Nvme1n1p2 + # 2. Run two VMs and attach every VM to two blk controllers. + vm_run_with_arg "0 1" + vms_prepare "0" + + traddr="" + get_traddr "Nvme0" + prepare_fio_cmd_tc1 "0" + # 3. Run FIO I/O traffic with verification enabled on NVMe disk. + $run_fio & + local last_pid=$! + sleep 3 + # 4. Run the command to hot remove NVMe disk. + delete_nvme "HotInNvme0" + local retcode=0 + wait_for_finish $last_pid || retcode=$? + # 5. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 2: Iteration 1." 1 $retcode + + # 6. Reboot VM + reboot_all_and_prepare "0" + # 7. Run FIO I/O traffic with verification enabled on NVMe disk. + $run_fio & + local retcode=0 + wait_for_finish $! || retcode=$? + # 8. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 2: Iteration 2." 1 $retcode + vm_shutdown_all + remove_vhost_controllers + add_nvme "HotInNvme1" "$traddr" + sleep 1 +} + +# ## Test Case 3 +function blk_hotremove_tc3() { + echo "Blk hotremove test case 3" + # 1. Use rpc command to create blk controllers. + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p0.0 HotInNvme1n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p1.0 Nvme1n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p2.1 HotInNvme1n1p1 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p3.1 Nvme1n1p1 + # 2. Run two VMs and attach every VM to two blk controllers. + vm_run_with_arg "0 1" + vms_prepare "0 1" + + traddr="" + get_traddr "Nvme0" + prepare_fio_cmd_tc1 "0" + # 3. Run FIO I/O traffic with verification enabled on first NVMe disk. + $run_fio & + local last_pid=$! + sleep 3 + # 4. Run the command to hot remove of first NVMe disk. + delete_nvme "HotInNvme1" + local retcode=0 + wait_for_finish $last_pid || retcode=$? + # 6. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 3: Iteration 1." 1 $retcode + + # 7. Reboot VM + reboot_all_and_prepare "0" + local retcode=0 + # 8. Run FIO I/O traffic with verification enabled on removed NVMe disk. + $run_fio & + wait_for_finish $! || retcode=$? + # 9. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 3: Iteration 2." 1 $retcode + vm_shutdown_all + remove_vhost_controllers + add_nvme "HotInNvme2" "$traddr" + sleep 1 +} + +# Test Case 4 +function blk_hotremove_tc4() { + echo "Blk hotremove test case 4" + # 1. Use rpc command to create blk controllers. + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p0.0 HotInNvme2n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p1.0 Nvme1n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p2.1 HotInNvme2n1p1 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p3.1 Nvme1n1p1 + # 2. Run two VM, attached to blk controllers. + vm_run_with_arg "0 1" + vms_prepare "0 1" + + prepare_fio_cmd_tc1 "0" + # 3. Run FIO I/O traffic on first VM with verification enabled on both NVMe disks. + $run_fio & + local last_pid_vm0=$! + + prepare_fio_cmd_tc1 "1" + # 4. Run FIO I/O traffic on second VM with verification enabled on both NVMe disks. + $run_fio & + local last_pid_vm1=$! + + sleep 3 + prepare_fio_cmd_tc1 "0 1" + # 5. Run the command to hot remove of first NVMe disk. + delete_nvme "HotInNvme2" + local retcode_vm0=0 + local retcode_vm1=0 + wait_for_finish $last_pid_vm0 || retcode_vm0=$? + wait_for_finish $last_pid_vm1 || retcode_vm1=$? + # 6. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 4: Iteration 1." 1 $retcode_vm0 + check_fio_retcode "Blk hotremove test case 4: Iteration 2." 1 $retcode_vm1 + + # 7. Reboot all VMs. + reboot_all_and_prepare "0 1" + # 8. Run FIO I/O traffic with verification enabled on removed NVMe disk. + $run_fio & + local retcode=0 + wait_for_finish $! || retcode=$? + # 9. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 4: Iteration 3." 1 $retcode + + vm_shutdown_all + remove_vhost_controllers + add_nvme "HotInNvme3" "$traddr" + sleep 1 +} + +# Test Case 5 +function blk_hotremove_tc5() { + echo "Blk hotremove test case 5" + # 1. Use rpc command to create blk controllers. + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p0.0 HotInNvme3n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p1.0 Nvme1n1p0 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p2.1 Nvme1n1p1 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p3.1 Nvme1n1p2 + # 2. Run two VM, attached to blk controllers. + vm_run_with_arg "0 1" + vms_prepare "0 1" + + prepare_fio_cmd_tc1 "0" + # 3. Run FIO I/O traffic on first VM with verification enabled on both NVMe disks. + $run_fio & + local last_pid=$! + sleep 3 + # 4. Run the command to hot remove of first NVMe disk. + delete_nvme "HotInNvme3" + local retcode=0 + wait_for_finish $last_pid || retcode=$? + # 5. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 5: Iteration 1." 1 $retcode + + # 6. Reboot VM. + reboot_all_and_prepare "0" + local retcode=0 + # 7. Run FIO I/O traffic with verification enabled on removed NVMe disk. + $run_fio & + wait_for_finish $! || retcode=$? + # 8. Check that fio job run on hot-removed device stopped. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Blk hotremove test case 5: Iteration 2." 1 $retcode + vm_shutdown_all + remove_vhost_controllers + add_nvme "HotInNvme4" "$traddr" + sleep 1 +} + +vms_setup +blk_hotremove_tc1 +blk_hotremove_tc2 +blk_hotremove_tc3 +blk_hotremove_tc4 +blk_hotremove_tc5 diff --git a/src/spdk/test/vhost/hotplug/common.sh b/src/spdk/test/vhost/hotplug/common.sh new file mode 100644 index 00000000..a94b06cf --- /dev/null +++ b/src/spdk/test/vhost/hotplug/common.sh @@ -0,0 +1,230 @@ +dry_run=false +no_shutdown=false +fio_bin="fio" +fio_jobs="$BASE_DIR/fio_jobs/" +test_type=spdk_vhost_scsi +reuse_vms=false +vms=() +used_vms="" +disk_split="" +x="" +scsi_hot_remove_test=0 +blk_hot_remove_test=0 + + +function usage() { + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for doing automated hotattach/hotdetach test" + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo "-h, --help print help and exit" + echo " --test-type=TYPE Perform specified test:" + echo " virtio - test host virtio-scsi-pci using file as disk image" + echo " kernel_vhost - use kernel driver vhost-scsi" + echo " spdk_vhost_scsi - use spdk vhost scsi" + echo " spdk_vhost_blk - use spdk vhost block" + echo "-x set -x for script debug" + echo " --fio-bin=FIO Use specific fio binary (will be uploaded to VM)" + echo " --fio-jobs= Fio configs to use for tests. Can point to a directory or" + echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: $TEST_DIR]" + echo " --vm=NUM[,OS][,DISKS] VM configuration. This parameter might be used more than once:" + echo " NUM - VM number (mandatory)" + echo " OS - VM os disk path (optional)" + echo " DISKS - VM os test disks/devices path (virtio - optional, kernel_vhost - mandatory)" + echo " --scsi-hotremove-test Run scsi hotremove tests" + exit 0 +} + +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + fio-bin=*) fio_bin="${OPTARG#*=}" ;; + fio-jobs=*) fio_jobs="${OPTARG#*=}" ;; + test-type=*) test_type="${OPTARG#*=}" ;; + vm=*) vms+=("${OPTARG#*=}") ;; + scsi-hotremove-test) scsi_hot_remove_test=1 ;; + blk-hotremove-test) blk_hot_remove_test=1 ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x + x="-x" ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done +shift $(( OPTIND - 1 )) + +fio_job=$BASE_DIR/fio_jobs/default_integrity.job +tmp_attach_job=$BASE_DIR/fio_jobs/fio_attach.job.tmp +tmp_detach_job=$BASE_DIR/fio_jobs/fio_detach.job.tmp +. $BASE_DIR/../common/common.sh + +rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +function print_test_fio_header() { + notice "===============" + notice "" + notice "Testing..." + + notice "Running fio jobs ..." + if [ $# -gt 0 ]; then + echo $1 + fi +} + +function run_vhost() { + notice "===============" + notice "" + notice "running SPDK" + notice "" + spdk_vhost_run --conf-path=$BASE_DIR + notice "" +} + +function vms_setup() { + for vm_conf in ${vms[@]}; do + IFS=',' read -ra conf <<< "$vm_conf" + if [[ x"${conf[0]}" == x"" ]] || ! assert_number ${conf[0]}; then + fail "invalid VM configuration syntax $vm_conf" + fi + + # Sanity check if VM is not defined twice + for vm_num in $used_vms; do + if [[ $vm_num -eq ${conf[0]} ]]; then + fail "VM$vm_num defined more than twice ( $(printf "'%s' " "${vms[@]}"))!" + fi + done + + used_vms+=" ${conf[0]}" + + setup_cmd="vm_setup --disk-type=$test_type --force=${conf[0]}" + [[ x"${conf[1]}" != x"" ]] && setup_cmd+=" --os=${conf[1]}" + [[ x"${conf[2]}" != x"" ]] && setup_cmd+=" --disks=${conf[2]}" + $setup_cmd + done +} + +function vm_run_with_arg() { + vm_run $@ + vm_wait_for_boot 600 $@ +} + +function vms_setup_and_run() { + vms_setup + vm_run_with_arg $@ +} + +function vms_prepare() { + for vm_num in $1; do + vm_dir=$VM_BASE_DIR/$vm_num + + qemu_mask_param="VM_${vm_num}_qemu_mask" + + host_name="VM-${vm_num}-${!qemu_mask_param}" + notice "Setting up hostname: $host_name" + vm_ssh $vm_num "hostname $host_name" + vm_start_fio_server --fio-bin=$fio_bin $readonly $vm_num + done +} + +function vms_reboot_all() { + notice "Rebooting all vms " + for vm_num in $1; do + vm_ssh $vm_num "reboot" || true + while vm_os_booted $vm_num; do + sleep 0.5 + done + done + + vm_wait_for_boot 300 $1 +} + +function check_fio_retcode() { + local fio_retcode=$3 + echo $1 + local retcode_expected=$2 + if [ $retcode_expected == 0 ]; then + if [ $fio_retcode != 0 ]; then + error " Fio test ended with error." + else + notice " Fio test ended with success." + fi + else + if [ $fio_retcode != 0 ]; then + notice " Fio test ended with expected error." + else + error " Fio test ended with unexpected success." + fi + fi +} + +function wait_for_finish() { + local wait_for_pid=$1 + local sequence=${2:-30} + for i in `seq 1 $sequence`; do + if kill -0 $wait_for_pid; then + sleep 0.5 + continue + else + break + fi + done + if kill -0 $wait_for_pid; then + error "Timeout for fio command" + fi + + wait $wait_for_pid +} + + +function reboot_all_and_prepare() { + vms_reboot_all "$1" + vms_prepare "$1" +} + +function post_test_case() { + vm_shutdown_all + spdk_vhost_kill +} + +function on_error_exit() { + set +e + echo "Error on $1 - $2" + post_test_case + print_backtrace + exit 1 +} + +function check_disks() { + if [ "$1" == "$2" ]; then + echo "Disk has not been deleted" + exit 1 + fi +} + +function get_traddr() { + local nvme_name=$1 + local nvme="$( $SPDK_BUILD_DIR/scripts/gen_nvme.sh )" + while read -r line; do + if [[ $line == *"TransportID"* ]] && [[ $line == *$nvme_name* ]]; then + local word_array=($line) + for word in "${word_array[@]}"; do + if [[ $word == *"traddr"* ]]; then + traddr=$( echo $word | sed 's/traddr://' | sed 's/"//' ) + fi + done + fi + done <<< "$nvme" +} + +function delete_nvme() { + $rpc_py delete_nvme_controller $1 +} + +function add_nvme() { + $rpc_py construct_nvme_bdev -b $1 -t PCIe -a $2 +} diff --git a/src/spdk/test/vhost/hotplug/fio_jobs/default_integrity.job b/src/spdk/test/vhost/hotplug/fio_jobs/default_integrity.job new file mode 100644 index 00000000..136fe902 --- /dev/null +++ b/src/spdk/test/vhost/hotplug/fio_jobs/default_integrity.job @@ -0,0 +1,16 @@ +[global] +blocksize=4k +iodepth=512 +iodepth_batch=128 +iodepth_low=256 +ioengine=libaio +group_reporting +thread +numjobs=1 +direct=1 +rw=randwrite +do_verify=1 +verify=md5 +verify_backlog=1024 +time_based=1 +runtime=10 diff --git a/src/spdk/test/vhost/hotplug/scsi_hotattach.sh b/src/spdk/test/vhost/hotplug/scsi_hotattach.sh new file mode 100755 index 00000000..e9d851f6 --- /dev/null +++ b/src/spdk/test/vhost/hotplug/scsi_hotattach.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +set -e +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +. $BASE_DIR/common.sh + +function prepare_fio_cmd_tc1() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_attach_job + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + for disk in $SCSI_DISK; do + echo "[nvme-host$disk]" >> $tmp_attach_job + echo "filename=/dev/$disk" >> $tmp_attach_job + done + vm_scp $vm_num $tmp_attach_job 127.0.0.1:/root/default_integrity_discs.job + run_fio+="--client=127.0.0.1,$(vm_fio_socket ${vm_num}) --remote-config /root/default_integrity_discs.job " + rm $tmp_attach_job + done +} + +# Check if fio test passes on device attached to first controller. +function hotattach_tc1() { + notice "Hotattach test case 1" + + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p0.0 0 Nvme0n1p0 + + sleep 3 + prepare_fio_cmd_tc1 "0" + $run_fio + check_fio_retcode "Hotattach test case 1: Iteration 1." 0 $? +} + +# Run fio test for previously attached device. +# During test attach another device to first controller and check fio status. +function hotattach_tc2() { + notice "Hotattach test case 2" + prepare_fio_cmd_tc1 "0" + + $run_fio & + last_pid=$! + sleep 3 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p0.0 1 Nvme0n1p1 + wait $last_pid + check_fio_retcode "Hotattach test case 2: Iteration 1." 0 $? +} + +# Run fio test for previously attached devices. +# During test attach another device to second controller and check fio status. +function hotattach_tc3() { + notice "Hotattach test case 3" + prepare_fio_cmd_tc1 "0" + + $run_fio & + last_pid=$! + sleep 3 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p1.0 0 Nvme0n1p2 + wait $last_pid + check_fio_retcode "Hotattach test case 3: Iteration 1." 0 $? +} + +# Run fio test for previously attached devices. +# During test attach another device to third controller(VM2) and check fio status. +# At the end after rebooting VMs run fio test for all devices and check fio status. +function hotattach_tc4() { + notice "Hotattach test case 4" + + prepare_fio_cmd_tc1 "0" + + $run_fio & + last_pid=$! + sleep 3 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p2.1 0 Nvme0n1p3 + wait $last_pid + check_fio_retcode "Hotattach test case 4: Iteration 1." 0 $? + + prepare_fio_cmd_tc1 "0 1" + $run_fio + check_fio_retcode "Hotattach test case 4: Iteration 2." 0 $? + + reboot_all_and_prepare "0 1" + + prepare_fio_cmd_tc1 "0 1" + $run_fio + check_fio_retcode "Hotattach test case 4: Iteration 3." 0 $? +} + +function cleanup_after_tests() { + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p0.0 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p0.0 1 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p1.0 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p2.1 0 + $rpc_py delete_nvme_controller Nvme0 +} + +hotattach_tc1 +hotattach_tc2 +hotattach_tc3 +hotattach_tc4 +cleanup_after_tests diff --git a/src/spdk/test/vhost/hotplug/scsi_hotdetach.sh b/src/spdk/test/vhost/hotplug/scsi_hotdetach.sh new file mode 100755 index 00000000..45c948d9 --- /dev/null +++ b/src/spdk/test/vhost/hotplug/scsi_hotdetach.sh @@ -0,0 +1,241 @@ +#!/usr/bin/env bash + +set -e +BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)" + +. $BASE_DIR/common.sh + +function get_first_disk() { + vm_check_scsi_location $1 + disk_array=( $SCSI_DISK ) + eval "$2=${disk_array[0]}" +} + +function check_disks() { + if [ "$1" == "$2" ]; then + fail "Disk has not been deleted" + fi +} + +function prepare_fio_cmd_tc1_iter1() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_detach_job + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + for disk in $SCSI_DISK; do + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + done + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/default_integrity_4discs.job + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/default_integrity_4discs.job " + rm $tmp_detach_job + done +} + +function prepare_fio_cmd_tc1_iter2() { + print_test_fio_header + + for vm_num in 2; do + cp $fio_job $tmp_detach_job + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + for disk in $SCSI_DISK; do + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + done + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/default_integrity_3discs.job + rm $tmp_detach_job + done + run_fio="$fio_bin --eta=never " + for vm_num in $used_vms; do + if [ $vm_num == 2 ]; then + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/default_integrity_3discs.job " + continue + fi + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/default_integrity_4discs.job " + done +} + +function prepare_fio_cmd_tc2_iter1() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_detach_job + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + disk_array=($SCSI_DISK) + disk=${disk_array[0]} + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/default_integrity.job + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/default_integrity.job " + rm $tmp_detach_job + done +} + +function prepare_fio_cmd_tc2_iter2() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_detach_job + if [ $vm_num == 2 ]; then + vm_job_name=default_integrity_3discs.job + else + vm_job_name=default_integrity_4discs.job + fi + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + for disk in $SCSI_DISK; do + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + done + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/$vm_job_name + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/${vm_job_name} " + rm $tmp_detach_job + done +} + + +function prepare_fio_cmd_tc3_iter1() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_detach_job + if [ $vm_num == 2 ]; then + vm_job_name=default_integrity_3discs.job + else + vm_job_name=default_integrity_4discs.job + fi + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + j=1 + for disk in $SCSI_DISK; do + if [ $vm_num == 2 ]; then + if [ $j == 1 ]; then + (( j++ )) + continue + fi + fi + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + (( j++ )) + done + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/$vm_job_name + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/$vm_job_name " + rm $tmp_detach_job + done +} + +# During fio test for all devices remove first device from fifth controller and check if fio fails. +# Also check if disc has been removed from VM. +function hotdetach_tc1() { + notice "Hotdetach test case 1" + first_disk="" + get_first_disk "2" first_disk + prepare_fio_cmd_tc1_iter1 "2 3" + $run_fio & + last_pid=$! + sleep 3 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p4.2 0 + set +xe + wait $last_pid + check_fio_retcode "Hotdetach test case 1: Iteration 1." 1 $? + set -xe + second_disk="" + get_first_disk "2" second_disk + check_disks $first_disk $second_disk + clear_after_tests +} + +# During fio test for device from third VM remove first device from fifth controller and check if fio fails. +# Also check if disc has been removed from VM. +function hotdetach_tc2() { + notice "Hotdetach test case 2" + sleep 2 + first_disk="" + get_first_disk "2" first_disk + prepare_fio_cmd_tc2_iter1 "2" + $run_fio & + last_pid=$! + sleep 3 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p4.2 0 + set +xe + wait $last_pid + check_fio_retcode "Hotdetach test case 2: Iteration 1." 1 $? + set -xe + second_disk="" + get_first_disk "2" second_disk + check_disks $first_disk $second_disk + clear_after_tests +} + +# Run fio test for all devices except one, then remove this device and check if fio passes. +# Also check if disc has been removed from VM. +function hotdetach_tc3() { + notice "Hotdetach test case 3" + sleep 2 + first_disk="" + get_first_disk "2" first_disk + prepare_fio_cmd_tc3_iter1 "2 3" + $run_fio & + last_pid=$! + sleep 3 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p4.2 0 + wait $last_pid + check_fio_retcode "Hotdetach test case 3: Iteration 1." 0 $? + second_disk="" + get_first_disk "2" second_disk + check_disks $first_disk $second_disk + clear_after_tests +} + +# Run fio test for all devices except one and run separate fio test for this device. +# Check if first fio test passes and second fio test fails. +# Also check if disc has been removed from VM. +# After reboot run fio test for remaining devices and check if fio passes. +function hotdetach_tc4() { + notice "Hotdetach test case 4" + sleep 2 + first_disk="" + get_first_disk "2" first_disk + prepare_fio_cmd_tc2_iter1 "2" + $run_fio & + first_fio_pid=$! + prepare_fio_cmd_tc3_iter1 "2 3" + $run_fio & + second_fio_pid=$! + sleep 3 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p4.2 0 + set +xe + wait $first_fio_pid + check_fio_retcode "Hotdetach test case 4: Iteration 1." 1 $? + set -xe + wait $second_fio_pid + check_fio_retcode "Hotdetach test case 4: Iteration 2." 0 $? + second_disk="" + get_first_disk "2" second_disk + check_disks $first_disk $second_disk + + reboot_all_and_prepare "2 3" + sleep 2 + prepare_fio_cmd_tc2_iter2 "2 3" + $run_fio + check_fio_retcode "Hotdetach test case 4: Iteration 3." 0 $? + clear_after_tests +} + +function clear_after_tests() { + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p4.2 0 Nvme0n1p8 +} + +hotdetach_tc1 +hotdetach_tc2 +hotdetach_tc3 +hotdetach_tc4 diff --git a/src/spdk/test/vhost/hotplug/scsi_hotplug.sh b/src/spdk/test/vhost/hotplug/scsi_hotplug.sh new file mode 100755 index 00000000..ab429c1e --- /dev/null +++ b/src/spdk/test/vhost/hotplug/scsi_hotplug.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +set -e +BASE_DIR=$(readlink -f $(dirname $0)) +. $BASE_DIR/common.sh + +if [[ $scsi_hot_remove_test == 1 ]] && [[ $blk_hot_remove_test == 1 ]]; then + notice "Vhost-scsi and vhost-blk hotremove tests cannot be run together" +fi + +# Add split section into vhost config +function gen_config() { + cp $BASE_DIR/vhost.conf.base $BASE_DIR/vhost.conf.in + cat << END_OF_CONFIG >> $BASE_DIR/vhost.conf.in +[Split] + Split Nvme0n1 16 + Split Nvme1n1 20 + Split HotInNvme0n1 2 + Split HotInNvme1n1 2 + Split HotInNvme2n1 2 + Split HotInNvme3n1 2 +END_OF_CONFIG +} + +# Run spdk by calling run_vhost from hotplug/common.sh. +# Then prepare vhost with rpc calls and setup and run 4 VMs. +function pre_hot_attach_detach_test_case() { + used_vms="" + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p0.0 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p1.0 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p2.1 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p3.1 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p4.2 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p5.2 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p6.3 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p7.3 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p4.2 0 Nvme0n1p8 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p4.2 1 Nvme0n1p9 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p5.2 0 Nvme0n1p10 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p5.2 1 Nvme0n1p11 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p6.3 0 Nvme0n1p12 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p6.3 1 Nvme0n1p13 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p7.3 0 Nvme0n1p14 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p7.3 1 Nvme0n1p15 + vms_setup_and_run "0 1 2 3" + vms_prepare "0 1 2 3" +} + +function clear_vhost_config() { + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p4.2 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p4.2 1 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p5.2 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p5.2 1 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p6.3 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p6.3 1 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p7.3 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p7.3 1 + $rpc_py remove_vhost_controller naa.Nvme0n1p0.0 + $rpc_py remove_vhost_controller naa.Nvme0n1p1.0 + $rpc_py remove_vhost_controller naa.Nvme0n1p2.1 + $rpc_py remove_vhost_controller naa.Nvme0n1p3.1 + $rpc_py remove_vhost_controller naa.Nvme0n1p4.2 + $rpc_py remove_vhost_controller naa.Nvme0n1p5.2 + $rpc_py remove_vhost_controller naa.Nvme0n1p6.3 + $rpc_py remove_vhost_controller naa.Nvme0n1p7.3 +} + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' ERR +gen_config +# Hotremove/hotattach/hotdetach test cases prerequisites +# 1. Run vhost with 2 NVMe disks. +run_vhost +rm $BASE_DIR/vhost.conf.in +if [[ $scsi_hot_remove_test == 0 ]] && [[ $blk_hot_remove_test == 0 ]]; then + pre_hot_attach_detach_test_case + $BASE_DIR/scsi_hotattach.sh --fio-bin=$fio_bin & + first_script=$! + $BASE_DIR/scsi_hotdetach.sh --fio-bin=$fio_bin & + second_script=$! + wait $first_script + wait $second_script + vm_shutdown_all + clear_vhost_config +fi +if [[ $scsi_hot_remove_test == 1 ]]; then + source $BASE_DIR/scsi_hotremove.sh +fi +if [[ $blk_hot_remove_test == 1 ]]; then + source $BASE_DIR/blk_hotremove.sh +fi +post_test_case diff --git a/src/spdk/test/vhost/hotplug/scsi_hotremove.sh b/src/spdk/test/vhost/hotplug/scsi_hotremove.sh new file mode 100644 index 00000000..829eb3f6 --- /dev/null +++ b/src/spdk/test/vhost/hotplug/scsi_hotremove.sh @@ -0,0 +1,232 @@ +set -xe + +# Vhost SCSI hotremove tests +# +# # Objective +# The purpose of these tests is to verify that SPDK vhost remains stable during +# hot-remove operations performed on SCSI controllers devices. +# Hot-remove is a scenario where a NVMe device is removed when already in use. +# Tests consist of 4 test cases. +# +# # Test cases description +# 1. FIO I/O traffic is run during hot-remove operations. +# By default FIO uses default_integrity*.job config files located in +# test/vhost/hotplug/fio_jobs directory. +# 2. FIO mode of operation is random write (randwrite) with verification enabled +# which results in also performing read operations. + +function prepare_fio_cmd_tc1() { + print_test_fio_header + + run_fio="$fio_bin --eta=never " + for vm_num in $1; do + cp $fio_job $tmp_detach_job + vm_dir=$VM_BASE_DIR/$vm_num + vm_check_scsi_location $vm_num + for disk in $SCSI_DISK; do + echo "[nvme-host$disk]" >> $tmp_detach_job + echo "filename=/dev/$disk" >> $tmp_detach_job + echo "size=100%" >> $tmp_detach_job + done + vm_scp "$vm_num" $tmp_detach_job 127.0.0.1:/root/default_integrity_2discs.job + run_fio+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/default_integrity_2discs.job " + rm $tmp_detach_job + done +} + +# Vhost SCSI hot-remove test cases. + +# Test Case 1 +function scsi_hotremove_tc1() { + echo "Scsi hotremove test case 1" + traddr="" + get_traddr "Nvme0" + # 1. Run the command to hot remove NVMe disk. + delete_nvme "Nvme0" + # 2. If vhost had crashed then tests would stop running + sleep 1 + add_nvme "HotInNvme0" "$traddr" +} + +# Test Case 2 +function scsi_hotremove_tc2() { + echo "Scsi hotremove test case 2" + # 1. Attach split NVMe bdevs to scsi controller. + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p0.0 0 HotInNvme0n1p0 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p1.0 0 Nvme1n1p0 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p2.1 0 HotInNvme0n1p1 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p3.1 0 Nvme1n1p1 + + # 2. Run two VMs, attached to scsi controllers. + vms_setup + vm_run_with_arg 0 1 + vms_prepare "0 1" + + vm_check_scsi_location "0" + local disks="$SCSI_DISK" + + traddr="" + get_traddr "Nvme0" + prepare_fio_cmd_tc1 "0 1" + # 3. Run FIO I/O traffic with verification enabled on on both NVMe disks in VM. + $run_fio & + local last_pid=$! + sleep 3 + # 4. Run the command to hot remove NVMe disk. + delete_nvme "HotInNvme0" + + # 5. Check that fio job run on hot-remove device stopped on VM. + # Expected: Fio should return error message and return code != 0. + wait_for_finish $last_pid || retcode=$? + check_fio_retcode "Scsi hotremove test case 2: Iteration 1." 1 $retcode + + # 6. Check if removed devices are gone from VM. + vm_check_scsi_location "0" + local new_disks="$SCSI_DISK" + check_disks "$disks" "$new_disks" + # 7. Reboot both VMs. + reboot_all_and_prepare "0 1" + # 8. Run FIO I/O traffic with verification enabled on on both VMs. + local retcode=0 + $run_fio & + wait_for_finish $! || retcode=$? + # 9. Check that fio job run on hot-remove device stopped on both VMs. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Scsi hotremove test case 2: Iteration 2." 1 $retcode + vm_shutdown_all + add_nvme "HotInNvme1" "$traddr" + sleep 1 +} + +# Test Case 3 +function scsi_hotremove_tc3() { + echo "Scsi hotremove test case 3" + # 1. Attach added NVMe bdev to scsi controller. + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p0.0 0 HotInNvme1n1p0 + # 2. Run two VM, attached to scsi controllers. + vm_run_with_arg 0 1 + vms_prepare "0 1" + vm_check_scsi_location "0" + local disks="$SCSI_DISK" + traddr="" + get_traddr "Nvme0" + # 3. Run FIO I/O traffic with verification enabled on on both NVMe disks in VMs. + prepare_fio_cmd_tc1 "0" + $run_fio & + local last_pid=$! + sleep 3 + # 4. Run the command to hot remove NVMe disk. + delete_nvme "HotInNvme1" + # 5. Check that fio job run on hot-remove device stopped on first VM. + # Expected: Fio should return error message and return code != 0. + wait_for_finish $last_pid || retcode=$? + check_fio_retcode "Scsi hotremove test case 3: Iteration 1." 1 $retcode + # 6. Check if removed devices are gone from lsblk. + vm_check_scsi_location "0" + local new_disks="$SCSI_DISK" + check_disks "$disks" "$new_disks" + # 7. Reboot both VMs. + reboot_all_and_prepare "0 1" + # 8. Run FIO I/O traffic with verification enabled on on both VMs. + local retcode=0 + $run_fio & + wait_for_finish $! || retcode=$? + # 9. Check that fio job run on hot-remove device stopped on both VMs. + # Expected: Fio should return error message and return code != 0. + check_fio_retcode "Scsi hotremove test case 3: Iteration 2." 1 $retcode + vm_shutdown_all + add_nvme "HotInNvme2" "$traddr" + sleep 1 +} + +# Test Case 4 +function scsi_hotremove_tc4() { + echo "Scsi hotremove test case 4" + # 1. Attach NVMe bdevs to scsi controllers. + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p0.0 0 HotInNvme2n1p0 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p2.1 0 HotInNvme2n1p1 + # 2. Run two VMs, attach to scsi controller. + vm_run_with_arg 0 1 + vms_prepare "0 1" + + # 3. Run FIO I/O traffic with verification enabled on first VM. + vm_check_scsi_location "0" + local disks_vm0="$SCSI_DISK" + # 4. Run FIO I/O traffic with verification enabled on second VM. + prepare_fio_cmd_tc1 "0" + $run_fio & + last_pid_vm0=$! + + vm_check_scsi_location "1" + local disks_vm1="$SCSI_DISK" + prepare_fio_cmd_tc1 "1" + $run_fio & + local last_pid_vm1=$! + prepare_fio_cmd_tc1 "0 1" + sleep 3 + # 5. Run the command to hot remove NVMe disk. + traddr="" + get_traddr "Nvme0" + delete_nvme "HotInNvme2" + # 6. Check that fio job run on hot-removed devices stopped. + # Expected: Fio should return error message and return code != 0. + local retcode_vm0=0 + wait_for_finish $last_pid_vm0 || retcode_vm0=$? + local retcode_vm1=0 + wait_for_finish $last_pid_vm1 || retcode_vm1=$? + check_fio_retcode "Scsi hotremove test case 4: Iteration 1." 1 $retcode_vm0 + check_fio_retcode "Scsi hotremove test case 4: Iteration 2." 1 $retcode_vm1 + + # 7. Check if removed devices are gone from lsblk. + vm_check_scsi_location "0" + local new_disks_vm0="$SCSI_DISK" + check_disks "$disks_vm0" "$new_disks_vm0" + vm_check_scsi_location "1" + local new_disks_vm1="$SCSI_DISK" + check_disks "$disks_vm1" "$new_disks_vm1" + + # 8. Reboot both VMs. + reboot_all_and_prepare "0 1" + # 9. Run FIO I/O traffic with verification enabled on on not-removed NVMe disk. + local retcode=0 + $run_fio & + wait_for_finish $! || retcode=$? + # 10. Check that fio job run on hot-removed device stopped. + # Expect: Fio should return error message and return code != 0. + check_fio_retcode "Scsi hotremove test case 4: Iteration 3." 1 $retcode + prepare_fio_cmd_tc1 "0 1" + # 11. Run FIO I/O traffic with verification enabled on on not-removed NVMe disk. + local retcode=0 + $run_fio & + wait_for_finish $! || retcode=$? + # 12. Check finished status FIO. Write and read in the not-removed. + # NVMe disk should be successful. + # Expected: Fio should return return code == 0. + check_fio_retcode "Scsi hotremove test case 4: Iteration 4." 0 $retcode + vm_shutdown_all + add_nvme "HotInNvme3" "$traddr" + sleep 1 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p1.0 0 + $rpc_py remove_vhost_scsi_target naa.Nvme0n1p3.1 0 +} + +function pre_scsi_hotremove_test_case() { + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p0.0 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p1.0 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p2.1 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p3.1 +} + +function post_scsi_hotremove_test_case() { + $rpc_py remove_vhost_controller naa.Nvme0n1p0.0 + $rpc_py remove_vhost_controller naa.Nvme0n1p1.0 + $rpc_py remove_vhost_controller naa.Nvme0n1p2.1 + $rpc_py remove_vhost_controller naa.Nvme0n1p3.1 +} + +pre_scsi_hotremove_test_case +scsi_hotremove_tc1 +scsi_hotremove_tc2 +scsi_hotremove_tc3 +scsi_hotremove_tc4 +post_scsi_hotremove_test_case diff --git a/src/spdk/test/vhost/hotplug/test_plan.md b/src/spdk/test/vhost/hotplug/test_plan.md new file mode 100644 index 00000000..0cbc5042 --- /dev/null +++ b/src/spdk/test/vhost/hotplug/test_plan.md @@ -0,0 +1,86 @@ +#Vhost hotattach and hotdetach test plan + +## Objective +The purpose of these tests is to verify that SPDK vhost remains stable during +hot-attach and hot-detach operations performed on SCSI controllers devices. +Hot-attach is a scenario where a device is added to controller already in use by +guest VM, while in hot-detach device is removed from controller when already in use. + +## Test Cases Description +1. FIO I/O traffic is run during hot-attach and detach operations. +By default FIO uses default_integrity*.job config files located in +test/vhost/hotfeatures/fio_jobs directory. +2. FIO mode of operation in random write (randwrite) with verification enabled +which results in also performing read operations. +3. Test case descriptions below contain manual steps for testing. +Automated tests are located in test/vhost/hotfeatures. + +### Hotattach, Hotdetach Test Cases prerequisites +1. Run vhost with 8 empty controllers. Prepare 16 nvme disks. +If you don't have 16 disks use split. +2. In test cases fio status is checked after every run if there are any errors. + +### Hotattach Test Cases prerequisites +1. Run vms, first with ctrlr-1 and ctrlr-2 and second one with ctrlr-3 and ctrlr-4. + +## Test Case 1 +1. Attach NVMe to Ctrlr 1 +2. Run fio integrity on attached device + +## Test Case 2 +1. Run fio integrity on attached device from test case 1 +2. During fio attach another NVMe to Ctrlr 1 +3. Run fio integrity on both devices + +## Test Case 3 +1. Run fio integrity on attached devices from previous test cases +2. During fio attach NVMe to Ctrl2 +3. Run fio integrity on all devices + +## Test Case 4 +2. Run fio integrity on attached device from previous test cases +3. During fio attach NVMe to Ctrl3/VM2 +4. Run fio integrity on all devices +5. Reboot VMs +6. Run fio integrity again on all devices + + +### Hotdetach Test Cases prerequisites +1. Run vms, first with ctrlr-5 and ctrlr-6 and second with ctrlr-7 and ctrlr-8. + +## Test Case 1 +1. Run fio on all devices +2. Detatch NVMe from Ctrl5 during fio +3. Check vhost or VMs did not crash +4. Check that detatched device is gone from VM +5. Check that fio job run on detached device stopped and failed + +## Test Case 2 +1. Attach NVMe to Ctrlr 5 +2. Run fio on 1 device from Ctrl 5 +3. Detatch NVMe from Ctrl5 during fio traffic +4. Check vhost or VMs did not crash +5. Check that fio job run on detached device stopped and failed +6. Check that detatched device is gone from VM + +## Test Case 3 +1. Attach NVMe to Ctrlr 5 +2. Run fio with integrity on all devices, except one +3. Detatch NVMe without traffic during fio running on other devices +4. Check vhost or VMs did not crash +5. Check that fio jobs did not fail +6. Check that detatched device is gone from VM + +## Test Case 4 +1. Attach NVMe to Ctrlr 5 +2. Run fio on 1 device from Ctrl 5 +3. Run separate fio with integrity on all other devices (all VMs) +4. Detatch NVMe from Ctrl1 during fio traffic +5. Check vhost or VMs did not crash +6. Check that fio job run on detached device stopped and failed +7. Check that other fio jobs did not fail +8. Check that detatched device is gone from VM +9. Reboot VMs +10. Check that detatched device is gone from VM +11. Check that all other devices are in place +12. Run fio integrity on all remianing devices diff --git a/src/spdk/test/vhost/hotplug/vhost.conf.base b/src/spdk/test/vhost/hotplug/vhost.conf.base new file mode 100644 index 00000000..4fa801d9 --- /dev/null +++ b/src/spdk/test/vhost/hotplug/vhost.conf.base @@ -0,0 +1,4 @@ +[Global] + +[Nvme] + HotplugEnable Yes diff --git a/src/spdk/test/vhost/initiator/autotest.config b/src/spdk/test/vhost/initiator/autotest.config new file mode 100644 index 00000000..61a1a242 --- /dev/null +++ b/src/spdk/test/vhost/initiator/autotest.config @@ -0,0 +1,5 @@ +vhost_0_reactor_mask=["0"] +vhost_0_master_core=0 + +VM_0_qemu_mask=1-10 +VM_0_qemu_numa_node=0 diff --git a/src/spdk/test/vhost/initiator/bdev.conf b/src/spdk/test/vhost/initiator/bdev.conf new file mode 100644 index 00000000..7ea01a82 --- /dev/null +++ b/src/spdk/test/vhost/initiator/bdev.conf @@ -0,0 +1,21 @@ +[VirtioUser0] + Path naa.Nvme0n1_scsi0.0 + Queues 8 + +[VirtioUser1] + Path naa.Malloc0.0 + Queues 8 + +[VirtioUser2] + Path naa.Malloc1.0 + Queues 8 + +[VirtioUser3] + Path naa.Nvme0n1_blk0.0 + Type Blk + Queues 8 + +[VirtioUser4] + Path naa.Nvme0n1_blk1.0 + Type Blk + Queues 8 diff --git a/src/spdk/test/vhost/initiator/bdev.fio b/src/spdk/test/vhost/initiator/bdev.fio new file mode 100644 index 00000000..40520228 --- /dev/null +++ b/src/spdk/test/vhost/initiator/bdev.fio @@ -0,0 +1,51 @@ +[global] +thread=1 +group_reporting=1 +direct=1 +norandommap=1 +time_based=1 +do_verify=1 +verify=md5 +verify_backlog=1024 +iodepth=128 +bs=4K +runtime=10 +size=13% + +[job_randwrite] +rw=randwrite +name=randwrite + +[job_randrw] +offset=13% +rw=randrw +name=randrw + +[job_write] +offset=26% +rw=write +name=write + +[job_rw] +offset=39% +rw=rw +name=rw + +[job_unmap_trim_sequential] +offset=52% +rw=trim +trim_verify_zero=1 +name=unmap_trim_sequential + +[job_unmap_trim_random] +offset=65% +rw=randtrim +trim_verify_zero=1 +name=unmap_trim_random + +[job_unmap_write] +stonewall +offset=52% +size=26% +rw=randwrite +name=unmap_write diff --git a/src/spdk/test/vhost/initiator/bdev_pci.conf b/src/spdk/test/vhost/initiator/bdev_pci.conf new file mode 100644 index 00000000..0e47e88a --- /dev/null +++ b/src/spdk/test/vhost/initiator/bdev_pci.conf @@ -0,0 +1,2 @@ +[VirtioPci] + Enable Yes diff --git a/src/spdk/test/vhost/initiator/blockdev.sh b/src/spdk/test/vhost/initiator/blockdev.sh new file mode 100755 index 00000000..b5ec3015 --- /dev/null +++ b/src/spdk/test/vhost/initiator/blockdev.sh @@ -0,0 +1,200 @@ +#!/usr/bin/env bash + +set -e +INITIATOR_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $INITIATOR_DIR/../common && pwd)" +ROOT_DIR=$(readlink -f $INITIATOR_DIR/../../..) + +PLUGIN_DIR=$ROOT_DIR/examples/bdev/fio_plugin +FIO_PATH="/usr/src/fio" +virtio_bdevs="" +virtio_with_unmap="" +os_image="/home/sys_sgsw/vhost_vm_image.qcow2" +#different linux distributions have different versions of targetcli that have different names for ramdisk option +targetcli_rd_name="" +kernel_vhost_disk="naa.5012345678901234" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Script for running vhost initiator tests." + echo "Usage: $(basename $1) [-h|--help] [--fiobin=PATH]" + echo "-h, --help Print help and exit" + echo " --vm_image=PATH Path to VM image used in these tests [default=$os_image]" + echo " --fiopath=PATH Path to fio directory on host [default=$FIO_PATH]" +} + +while getopts 'h-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 && exit 0 ;; + fiopath=*) FIO_PATH="${OPTARG#*=}" ;; + vm_image=*) os_image="${OPTARG#*=}" ;; + *) usage $0 echo "Invalid argument '$OPTARG'" && exit 1 ;; + esac + ;; + h) usage $0 && exit 0 ;; + *) usage $0 "Invalid argument '$optchar'" && exit 1 ;; + esac +done + +source $COMMON_DIR/common.sh +source $INITIATOR_DIR/autotest.config +PLUGIN_DIR=$ROOT_DIR/examples/bdev/fio_plugin +RPC_PY="$ROOT_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +if [ ! -x $FIO_PATH ]; then + error "Invalid path of fio binary" +fi + +if [[ $EUID -ne 0 ]]; then + echo "INFO: Go away user come back as root" + exit 1 +fi + +if targetcli ls backstores | grep ramdisk ; then + targetcli_rd_name="ramdisk" +elif targetcli ls backstores | grep rd_mcp ; then + targetcli_rd_name="rd_mcp" +else + error "targetcli: cannot create a ramdisk.\ + Neither backstores/ramdisk nor backstores/rd_mcp is available" +fi + +function remove_kernel_vhost() +{ + targetcli "/vhost delete $kernel_vhost_disk" + targetcli "/backstores/$targetcli_rd_name delete ramdisk" +} + +trap 'rm -f *.state $ROOT_DIR/spdk.tar.gz $ROOT_DIR/fio.tar.gz $(get_vhost_dir)/Virtio0;\ + error_exit "${FUNCNAME}""${LINENO}"' ERR SIGTERM SIGABRT +function run_spdk_fio() { + LD_PRELOAD=$PLUGIN_DIR/fio_plugin $FIO_PATH/fio --ioengine=spdk_bdev\ + "$@" --spdk_mem=1024 --spdk_single_seg=1 +} + +function create_bdev_config() +{ + local vbdevs + + if [ -z "$($RPC_PY get_bdevs | jq '.[] | select(.name=="Nvme0n1")')" ]; then + error "Nvme0n1 bdev not found!" + fi + + $RPC_PY construct_split_vbdev Nvme0n1 6 + + $RPC_PY construct_vhost_scsi_controller naa.Nvme0n1_scsi0.0 + $RPC_PY add_vhost_scsi_lun naa.Nvme0n1_scsi0.0 0 Nvme0n1p0 + $RPC_PY add_vhost_scsi_lun naa.Nvme0n1_scsi0.0 1 Nvme0n1p1 + $RPC_PY add_vhost_scsi_lun naa.Nvme0n1_scsi0.0 2 Nvme0n1p2 + $RPC_PY add_vhost_scsi_lun naa.Nvme0n1_scsi0.0 3 Nvme0n1p3 + + $RPC_PY construct_vhost_blk_controller naa.Nvme0n1_blk0.0 Nvme0n1p4 + $RPC_PY construct_vhost_blk_controller naa.Nvme0n1_blk1.0 Nvme0n1p5 + + $RPC_PY construct_malloc_bdev 128 512 --name Malloc0 + $RPC_PY construct_vhost_scsi_controller naa.Malloc0.0 + $RPC_PY add_vhost_scsi_lun naa.Malloc0.0 0 Malloc0 + + $RPC_PY construct_malloc_bdev 128 4096 --name Malloc1 + $RPC_PY construct_vhost_scsi_controller naa.Malloc1.0 + $RPC_PY add_vhost_scsi_lun naa.Malloc1.0 0 Malloc1 + + vbdevs=$(discover_bdevs $ROOT_DIR $INITIATOR_DIR/bdev.conf) + virtio_bdevs=$(jq -r '[.[].name] | join(":")' <<< $vbdevs) + virtio_with_unmap=$(jq -r '[.[] | select(.supported_io_types.unmap==true).name] + | join(":")' <<< $vbdevs) +} + +timing_enter spdk_vhost_run +spdk_vhost_run +timing_exit spdk_vhost_run + +timing_enter create_bdev_config +create_bdev_config +timing_exit create_bdev_config + +timing_enter run_spdk_fio +run_spdk_fio $INITIATOR_DIR/bdev.fio --filename=$virtio_bdevs --section=job_randwrite --section=job_randrw \ + --section=job_write --section=job_rw --spdk_conf=$INITIATOR_DIR/bdev.conf +report_test_completion "vhost_run_spdk_fio" +timing_exit run_spdk_fio + +timing_enter run_spdk_fio_unmap +run_spdk_fio $INITIATOR_DIR/bdev.fio --filename=$virtio_with_unmap --spdk_conf=$INITIATOR_DIR/bdev.conf \ + --spdk_conf=$INITIATOR_DIR/bdev.conf +timing_exit run_spdk_fio_unmap + +timing_enter create_kernel_vhost +targetcli "/backstores/$targetcli_rd_name create name=ramdisk size=1GB" +targetcli "/vhost create $kernel_vhost_disk" +targetcli "/vhost/$kernel_vhost_disk/tpg1/luns create /backstores/$targetcli_rd_name/ramdisk" +timing_exit create_kernel_vhost + +timing_enter setup_vm +vm_no="0" +vm_setup --disk-type=spdk_vhost_scsi --force=$vm_no --os=$os_image \ + --disks="Nvme0n1_scsi0:Malloc0:Malloc1:$kernel_vhost_disk,kernel_vhost:Virtio0,virtio:\ + Nvme0n1_blk0,spdk_vhost_blk:Nvme0n1_blk1,spdk_vhost_blk" \ + --queue_num=8 --memory=6144 +vm_run $vm_no + +timing_enter vm_wait_for_boot +vm_wait_for_boot 600 $vm_no +timing_exit vm_wait_for_boot + +timing_enter vm_scp_spdk +touch $ROOT_DIR/spdk.tar.gz +tar --exclude="spdk.tar.gz" --exclude="*.o" --exclude="*.d" --exclude=".git" -C $ROOT_DIR -zcf $ROOT_DIR/spdk.tar.gz . +vm_scp $vm_no $ROOT_DIR/spdk.tar.gz "127.0.0.1:/root" +vm_ssh $vm_no "mkdir -p /root/spdk; tar -zxf /root/spdk.tar.gz -C /root/spdk --strip-components=1" + +touch $ROOT_DIR/fio.tar.gz +tar --exclude="fio.tar.gz" --exclude="*.o" --exclude="*.d" --exclude=".git" -C $FIO_PATH -zcf $ROOT_DIR/fio.tar.gz . +vm_scp $vm_no $ROOT_DIR/fio.tar.gz "127.0.0.1:/root" +vm_ssh $vm_no "rm -rf /root/fio_src; mkdir -p /root/fio_src; tar -zxf /root/fio.tar.gz -C /root/fio_src --strip-components=1" +timing_exit vm_scp_spdk + +timing_enter vm_build_spdk +nproc=$(vm_ssh $vm_no "nproc") +vm_ssh $vm_no " cd /root/fio_src ; make clean ; make -j${nproc} ; make install" +vm_ssh $vm_no " cd spdk ; ./configure --with-fio=/root/fio_src ; make clean ; make -j${nproc}" +timing_exit vm_build_spdk + +vm_ssh $vm_no "/root/spdk/scripts/setup.sh" +vbdevs=$(vm_ssh $vm_no ". /root/spdk/test/common/autotest_common.sh && discover_bdevs /root/spdk \ + /root/spdk/test/vhost/initiator/bdev_pci.conf") +virtio_bdevs=$(jq -r '[.[].name] | join(":")' <<< $vbdevs) +virtio_with_unmap=$(jq -r '[.[] | select(.supported_io_types.unmap==true).name] + | join(":")' <<< $vbdevs) +timing_exit setup_vm + +timing_enter run_spdk_fio_pci +vm_ssh $vm_no "LD_PRELOAD=/root/spdk/examples/bdev/fio_plugin/fio_plugin /root/fio_src/fio --ioengine=spdk_bdev \ + /root/spdk/test/vhost/initiator/bdev.fio --filename=$virtio_bdevs --section=job_randwrite \ + --section=job_randrw --section=job_write --section=job_rw \ + --spdk_conf=/root/spdk/test/vhost/initiator/bdev_pci.conf --spdk_mem=1024 --spdk_single_seg=1" +timing_exit run_spdk_fio_pci + +timing_enter run_spdk_fio_pci_unmap +vm_ssh $vm_no "LD_PRELOAD=/root/spdk/examples/bdev/fio_plugin/fio_plugin /root/fio_src/fio --ioengine=spdk_bdev \ + /root/spdk/test/vhost/initiator/bdev.fio --filename=$virtio_with_unmap \ + --spdk_conf=/root/spdk/test/vhost/initiator/bdev_pci.conf --spdk_mem=1024 --spdk_single_seg=1" +timing_exit run_spdk_fio_pci_unmap + +timing_enter vm_shutdown_all +vm_shutdown_all +timing_exit vm_shutdown_all + +rm -f *.state $ROOT_DIR/spdk.tar.gz $ROOT_DIR/fio.tar.gz $(get_vhost_dir)/Virtio0 +timing_enter remove_kernel_vhost +remove_kernel_vhost +timing_exit remove_kernel_vhost + +$RPC_PY delete_nvme_controller Nvme0 + +timing_enter spdk_vhost_kill +spdk_vhost_kill +timing_exit spdk_vhost_kill diff --git a/src/spdk/test/vhost/initiator/json_config.sh b/src/spdk/test/vhost/initiator/json_config.sh new file mode 100755 index 00000000..86078c9a --- /dev/null +++ b/src/spdk/test/vhost/initiator/json_config.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -ex +INITIATOR_JSON_DIR=$(readlink -f $(dirname $0)) +. $INITIATOR_JSON_DIR/../../json_config/common.sh + +# Load spdk_tgt with controllers used by virtio initiator +# Test also virtio_pci bdevs +function construct_vhost_devices() { + $rpc_py construct_split_vbdev Nvme0n1 4 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p0.0 + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1p1.1 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p0.0 0 Nvme0n1p0 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1p1.1 0 Nvme0n1p1 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p2.0 Nvme0n1p2 + $rpc_py construct_vhost_blk_controller naa.Nvme0n1p3.1 Nvme0n1p3 + pci_scsi=$(lspci -nn -D | grep '1af4:1004' | head -1 | awk '{print $1;}') + pci_blk=$(lspci -nn -D | grep '1af4:1001' | head -1 | awk '{print $1;}') + if [ ! -z $pci_scsi ]; then + $rpc_py construct_virtio_dev -t pci -a $pci_scsi -d scsi Virtio0 + fi + if [ ! -z $pci_blk ]; then + $rpc_py construct_virtio_dev -t pci -a $pci_blk -d blk Virtio1 + fi +} + +# Load virtio initiator with bdevs +function connect_to_vhost_devices_from_initiator() { + $rpc_py construct_virtio_dev -t user -a naa.Nvme0n1p0.0 -d scsi Nvme0n1p0 + $rpc_py construct_virtio_dev -t user -a naa.Nvme0n1p2.0 -d blk Nvme0n1p2 +} + +function disconnect_and_clear_vhost_devices() { + $clear_config_py clear_config +} + +function test_subsystems() { + run_spdk_tgt + rootdir=$(readlink -f $INITIATOR_JSON_DIR/../../..) + + rpc_py="$spdk_rpc_py" + clear_config_py="$spdk_clear_config_py" + load_nvme + + construct_vhost_devices + test_json_config + run_initiator + rpc_py="$initiator_rpc_py" + clear_config_py="$initiator_clear_config_py" + $rpc_py start_subsystem_init + connect_to_vhost_devices_from_initiator + test_json_config + disconnect_and_clear_vhost_devices + test_global_params "virtio_initiator" + clear_config_py="$spdk_clear_config_py" + $clear_config_py clear_config + kill_targets +} + +trap 'on_error_exit "${FUNCNAME}" "${LINENO}"' ERR +timing_enter json_config_virtio_initiator + +test_subsystems +timing_exit json_config_virtio_initiator +report_test_completion json_config_virtio_initiator diff --git a/src/spdk/test/vhost/integrity/integrity_start.sh b/src/spdk/test/vhost/integrity/integrity_start.sh new file mode 100755 index 00000000..a9899e9f --- /dev/null +++ b/src/spdk/test/vhost/integrity/integrity_start.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +set -e + +INTEGRITY_BASE_DIR=$(readlink -f $(dirname $0)) +ctrl_type="spdk_vhost_scsi" +vm_fs="ext4" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for doing automated test" + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo "-h, --help Print help and exit" + echo " --work-dir=WORK_DIR Workspace for the test to run" + echo " --ctrl-type=TYPE Controller type to use for test:" + echo " spdk_vhost_scsi - use spdk vhost scsi" + echo " --fs=FS_LIST Filesystems to use for test in VM:" + echo " Example: --fs=\"ext4 ntfs ext2\"" + echo " Default: ext4" + echo " spdk_vhost_blk - use spdk vhost block" + echo "-x set -x for script debug" + exit 0 +} + +function clean_lvol_cfg() +{ + notice "Removing lvol bdev and lvol store" + $rpc_py destroy_lvol_bdev lvol_store/lvol_bdev + $rpc_py destroy_lvol_store -l lvol_store +} + +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + ctrl-type=*) ctrl_type="${OPTARG#*=}" ;; + fs=*) vm_fs="${OPTARG#*=}" ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x + x="-x" ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done + +. $(readlink -e "$(dirname $0)/../common/common.sh") || exit 1 +rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' SIGTERM SIGABRT ERR + +# Try to kill if any VM remains from previous runs +vm_kill_all + +notice "Starting SPDK vhost" +spdk_vhost_run +notice "..." + +# Set up lvols and vhost controllers +trap 'clean_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' SIGTERM SIGABRT ERR +notice "Constructing lvol store and lvol bdev on top of Nvme0n1" +lvs_uuid=$($rpc_py construct_lvol_store Nvme0n1 lvol_store) +$rpc_py construct_lvol_bdev lvol_bdev 10000 -l lvol_store + +if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then + $rpc_py construct_vhost_scsi_controller naa.Nvme0n1.0 + $rpc_py add_vhost_scsi_lun naa.Nvme0n1.0 0 lvol_store/lvol_bdev +elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then + $rpc_py construct_vhost_blk_controller naa.Nvme0n1.0 lvol_store/lvol_bdev +fi + +# Set up and run VM +setup_cmd="vm_setup --disk-type=$ctrl_type --force=0" +setup_cmd+=" --os=/home/sys_sgsw/vhost_vm_image.qcow2" +setup_cmd+=" --disks=Nvme0n1" +$setup_cmd + +# Run VM +vm_run 0 +vm_wait_for_boot 600 0 + +# Run tests on VM +vm_scp 0 $INTEGRITY_BASE_DIR/integrity_vm.sh root@127.0.0.1:/root/integrity_vm.sh +vm_ssh 0 "~/integrity_vm.sh $ctrl_type \"$vm_fs\"" + +notice "Shutting down virtual machine..." +vm_shutdown_all + +clean_lvol_cfg + +$rpc_py delete_nvme_controller Nvme0 + +notice "Shutting down SPDK vhost app..." +spdk_vhost_kill diff --git a/src/spdk/test/vhost/integrity/integrity_vm.sh b/src/spdk/test/vhost/integrity/integrity_vm.sh new file mode 100755 index 00000000..ccb01cea --- /dev/null +++ b/src/spdk/test/vhost/integrity/integrity_vm.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -xe + +basedir=$(readlink -f $(dirname $0)) +MAKE="make -j$(( $(nproc) * 2 ))" + +if [[ $1 == "spdk_vhost_scsi" ]]; then + devs="" + for entry in /sys/block/sd*; do + if grep -Eq '(INTEL|RAWSCSI|LIO-ORG)' $entry/device/vendor; then + devs+="$(basename $entry) " + fi + done +elif [[ $1 == "spdk_vhost_blk" ]]; then + devs=$(cd /sys/block; echo vd*) +fi + +fs=$2 + +trap "exit 1" SIGINT SIGTERM EXIT + +for fs in $fs; do + for dev in $devs; do + parted_cmd="parted -s /dev/${dev}" + + echo "INFO: Creating partition table on disk using: $parted_cmd mklabel gpt" + $parted_cmd mklabel gpt + $parted_cmd mkpart primary 2048s 100% + sleep 2 + + mkfs_cmd="mkfs.$fs" + if [[ $fs == "ntfs" ]]; then + mkfs_cmd+=" -f" + fi + mkfs_cmd+=" /dev/${dev}1" + echo "INFO: Creating filesystem using: $mkfs_cmd" + wipefs -a /dev/${dev}1 + $mkfs_cmd + + mkdir -p /mnt/${dev}dir + mount -o sync /dev/${dev}1 /mnt/${dev}dir + + fio --name="integrity" --bsrange=4k-512k --iodepth=128 --numjobs=1 --direct=1 \ + --thread=1 --group_reporting=1 --rw=randrw --rwmixread=70 \ + --filename=/mnt/${dev}dir/test_file --verify=md5 --do_verify=1 \ + --verify_backlog=1024 --fsync_on_close=1 --runtime=20 --time_based=1 --size=512m + + # Print out space consumed on target device + df -h /dev/$dev + done + + for dev in $devs; do + umount /mnt/${dev}dir + rm -rf /mnt/${dev}dir + + stats=( $(cat /sys/block/$dev/stat) ) + echo "" + echo "$dev stats" + printf "READ IO cnt: % 8u merges: % 8u sectors: % 8u ticks: % 8u\n" \ + ${stats[0]} ${stats[1]} ${stats[2]} ${stats[3]} + printf "WRITE IO cnt: % 8u merges: % 8u sectors: % 8u ticks: % 8u\n" \ + ${stats[4]} ${stats[5]} ${stats[6]} ${stats[7]} + printf "in flight: % 8u io ticks: % 8u time in queue: % 8u\n" \ + ${stats[8]} ${stats[9]} ${stats[10]} + echo "" + done +done + +trap - SIGINT SIGTERM EXIT diff --git a/src/spdk/test/vhost/json_config/json_config.sh b/src/spdk/test/vhost/json_config/json_config.sh new file mode 100755 index 00000000..d5683f1d --- /dev/null +++ b/src/spdk/test/vhost/json_config/json_config.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -ex +VHOST_JSON_DIR=$(readlink -f $(dirname $0)) +. $VHOST_JSON_DIR/../../json_config/common.sh + +function test_subsystems() { + run_spdk_tgt + + rpc_py="$spdk_rpc_py" + clear_config_py="$spdk_clear_config_py" + load_nvme + + upload_vhost + test_json_config + $clear_config_py clear_config + + kill_targets +} + +trap 'on_error_exit "${FUNCNAME}" "${LINENO}"' ERR +timing_enter json_config_vhost + +test_subsystems +timing_exit json_config_vhost +report_test_completion json_config_vhost diff --git a/src/spdk/test/vhost/lvol/autotest.config b/src/spdk/test/vhost/lvol/autotest.config new file mode 100644 index 00000000..9b653cd7 --- /dev/null +++ b/src/spdk/test/vhost/lvol/autotest.config @@ -0,0 +1,74 @@ +vhost_0_reactor_mask="[0-31]" +vhost_0_master_core=0 + +VM_0_qemu_mask=1 +VM_0_qemu_numa_node=0 + +VM_1_qemu_mask=2 +VM_1_qemu_numa_node=0 + +VM_2_qemu_mask=3 +VM_2_qemu_numa_node=0 + +VM_3_qemu_mask=4 +VM_3_qemu_numa_node=0 + +VM_4_qemu_mask=5 +VM_4_qemu_numa_node=0 + +VM_5_qemu_mask=6 +VM_5_qemu_numa_node=0 + +VM_6_qemu_mask=7 +VM_6_qemu_numa_node=0 + +VM_7_qemu_mask=8 +VM_7_qemu_numa_node=0 + +VM_8_qemu_mask=9 +VM_8_qemu_numa_node=0 + +VM_9_qemu_mask=10 +VM_9_qemu_numa_node=0 + +VM_10_qemu_mask=11 +VM_10_qemu_numa_node=0 + +VM_11_qemu_mask=12 +VM_11_qemu_numa_node=0 + +VM_12_qemu_mask=13 +VM_12_qemu_numa_node=1 + +VM_13_qemu_mask=14 +VM_13_qemu_numa_node=1 + +VM_14_qemu_mask=15 +VM_14_qemu_numa_node=1 + +VM_15_qemu_mask=16 +VM_15_qemu_numa_node=1 + +VM_16_qemu_mask=17 +VM_16_qemu_numa_node=1 + +VM_17_qemu_mask=18 +VM_17_qemu_numa_node=1 + +VM_18_qemu_mask=19 +VM_18_qemu_numa_node=1 + +VM_19_qemu_mask=20 +VM_19_qemu_numa_node=1 + +VM_20_qemu_mask=21 +VM_20_qemu_numa_node=1 + +VM_21_qemu_mask=22 +VM_21_qemu_numa_node=1 + +VM_22_qemu_mask=23 +VM_22_qemu_numa_node=1 + +VM_23_qemu_mask=24 +VM_23_qemu_numa_node=1 diff --git a/src/spdk/test/vhost/lvol/lvol_test.sh b/src/spdk/test/vhost/lvol/lvol_test.sh new file mode 100755 index 00000000..5190b5f2 --- /dev/null +++ b/src/spdk/test/vhost/lvol/lvol_test.sh @@ -0,0 +1,286 @@ +#!/usr/bin/env bash +set -e + +rootdir=$(readlink -f $(dirname $0))/../../.. +source "$rootdir/scripts/common.sh" + +LVOL_TEST_DIR=$(readlink -f $(dirname $0)) +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $LVOL_TEST_DIR/../../../../ && pwd)" +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $LVOL_TEST_DIR/../common && pwd)" + +. $COMMON_DIR/common.sh +rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +vm_count=1 +max_disks="" +ctrl_type="spdk_vhost_scsi" +use_fs=false +nested_lvol=false +distribute_cores=false + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for doing automated test" + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo "-h, --help Print help and exit" + echo " --fio-bin=PATH Path to FIO binary.;" + echo " --vm-count=INT Virtual machines to use in test;" + echo " Each VM will get one lvol bdev on each NVMe." + echo " Default: 1" + echo " --max-disks=INT Maximum number of NVMe drives to use in test." + echo " Default: will use all available NVMes." + echo " --ctrl-type=TYPE Controller type to use for test:" + echo " spdk_vhost_scsi - use spdk vhost scsi" + echo " spdk_vhost_blk - use spdk vhost block" + echo " --nested-lvol If enabled will create additional lvol bdev" + echo " on each NVMe for use as base device for next" + echo " lvol store and lvol bdevs." + echo " (NVMe->lvol_store->lvol_bdev->lvol_store->lvol_bdev)" + echo " Default: False" + echo " --thin-provisioning Create lvol bdevs thin provisioned instead of" + echo " allocating space up front" + echo " --distribute-cores Use custom config file and run vhost controllers" + echo " on different CPU cores instead of single core." + echo " Default: False" + echo "-x set -x for script debug" + echo " --multi-os Run tests on different os types in VMs" + echo " Default: False" + exit 0 +} + +function clean_lvol_cfg() +{ + notice "Removing nested lvol bdevs" + for lvol_bdev in "${nest_lvol_bdevs[@]}"; do + $rpc_py destroy_lvol_bdev $lvol_bdev + notice "nested lvol bdev $lvol_bdev removed" + done + + notice "Removing nested lvol stores" + for lvol_store in "${nest_lvol_stores[@]}"; do + $rpc_py destroy_lvol_store -u $lvol_store + notice "nested lvol store $lvol_store removed" + done + + notice "Removing lvol bdevs" + for lvol_bdev in "${lvol_bdevs[@]}"; do + $rpc_py destroy_lvol_bdev $lvol_bdev + notice "lvol bdev $lvol_bdev removed" + done + + notice "Removing lvol stores" + for lvol_store in "${lvol_stores[@]}"; do + $rpc_py destroy_lvol_store -u $lvol_store + notice "lvol store $lvol_store removed" + done +} + +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;; + vm-count=*) vm_count="${OPTARG#*=}" ;; + max-disks=*) max_disks="${OPTARG#*=}" ;; + ctrl-type=*) ctrl_type="${OPTARG#*=}" ;; + nested-lvol) nested_lvol=true ;; + distribute-cores) distribute_cores=true ;; + thin-provisioning) thin=" -t " ;; + multi-os) multi_os=true ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x + x="-x" ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done + +notice "Get NVMe disks:" +nvmes=($(iter_pci_class_code 01 08 02)) + +if [[ -z $max_disks ]]; then + max_disks=${#nvmes[@]} +fi + +if [[ ${#nvmes[@]} -lt max_disks ]]; then + fail "Number of NVMe drives (${#nvmes[@]}) is lower than number of requested disks for test ($max_disks)" +fi + +if $distribute_cores; then + # FIXME: this need to be handled entirely in common.sh + source $LVOL_TEST_DIR/autotest.config +fi + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' SIGTERM SIGABRT ERR + +vm_kill_all + +notice "running SPDK vhost" +spdk_vhost_run +notice "..." + +trap 'clean_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' SIGTERM SIGABRT ERR + +lvol_stores=() +lvol_bdevs=() +nest_lvol_stores=() +nest_lvol_bdevs=() +used_vms="" + +# On each NVMe create one lvol store +for (( i=0; i<$max_disks; i++ ));do + + # Create base lvol store on NVMe + notice "Creating lvol store on device Nvme${i}n1" + ls_guid=$($rpc_py construct_lvol_store Nvme${i}n1 lvs_$i -c 4194304) + lvol_stores+=("$ls_guid") + + if $nested_lvol; then + free_mb=$(get_lvs_free_mb "$ls_guid") + size=$((free_mb / (vm_count+1) )) + + notice "Creating lvol bdev on lvol store: $ls_guid" + lb_name=$($rpc_py construct_lvol_bdev -u $ls_guid lbd_nest $size $thin) + + notice "Creating nested lvol store on lvol bdev: $lb_name" + nest_ls_guid=$($rpc_py construct_lvol_store $lb_name lvs_n_$i -c 4194304) + nest_lvol_stores+=("$nest_ls_guid") + + for (( j=0; j<$vm_count; j++)); do + notice "Creating nested lvol bdev for VM $i on lvol store $nest_ls_guid" + free_mb=$(get_lvs_free_mb "$nest_ls_guid") + nest_size=$((free_mb / (vm_count-j) )) + lb_name=$($rpc_py construct_lvol_bdev -u $nest_ls_guid lbd_vm_$j $nest_size $thin) + nest_lvol_bdevs+=("$lb_name") + done + fi + + # Create base lvol bdevs + for (( j=0; j<$vm_count; j++)); do + notice "Creating lvol bdev for VM $i on lvol store $ls_guid" + free_mb=$(get_lvs_free_mb "$ls_guid") + size=$((free_mb / (vm_count-j) )) + lb_name=$($rpc_py construct_lvol_bdev -u $ls_guid lbd_vm_$j $size $thin) + lvol_bdevs+=("$lb_name") + done +done + +bdev_info=$($rpc_py get_bdevs) +notice "Configuration after initial set-up:" +$rpc_py get_lvol_stores +echo "$bdev_info" + +# Set up VMs +for (( i=0; i<$vm_count; i++)); do + vm="vm_$i" + + # Get all lvol bdevs associated with this VM number + bdevs=$(jq -r "map(select(.aliases[] | contains(\"$vm\")) | \ + .aliases[]) | join(\" \")" <<< "$bdev_info") + bdevs=($bdevs) + + setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i" + if [[ $i%2 -ne 0 ]] && [[ $multi_os ]]; then + setup_cmd+=" --os=/home/sys_sgsw/spdk_vhost_CentOS_vm_image.qcow2" + else + setup_cmd+=" --os=/home/sys_sgsw/vhost_vm_image.qcow2" + fi + + # Create single SCSI controller or multiple BLK controllers for this VM + if $distribute_cores; then + mask="VM_${i}_qemu_mask" + mask_arg="--cpumask ${!mask}" + fi + + if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then + $rpc_py construct_vhost_scsi_controller naa.0.$i $mask_arg + for (( j=0; j<${#bdevs[@]}; j++)); do + $rpc_py add_vhost_scsi_lun naa.0.$i $j ${bdevs[$j]} + done + setup_cmd+=" --disks=0" + elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then + disk="" + for (( j=0; j<${#bdevs[@]}; j++)); do + $rpc_py construct_vhost_blk_controller naa.$j.$i ${bdevs[$j]} $mask_arg + disk+="${j}:" + done + disk="${disk::-1}" + setup_cmd+=" --disks=$disk" + fi + + $setup_cmd + used_vms+=" $i" +done + +$rpc_py get_vhost_controllers + +# Run VMs +vm_run $used_vms +vm_wait_for_boot 600 $used_vms + +# Get disk names from VMs and run FIO traffic + +fio_disks="" +for vm_num in $used_vms; do + vm_dir=$VM_BASE_DIR/$vm_num + qemu_mask_param="VM_${vm_num}_qemu_mask" + + host_name="VM-$vm_num-${!qemu_mask_param}" + vm_ssh $vm_num "hostname $host_name" + vm_start_fio_server $fio_bin $vm_num + + if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then + vm_check_scsi_location $vm_num + elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then + vm_check_blk_location $vm_num + fi + + fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)" +done + +if [[ $RUN_NIGHTLY -eq 1 ]]; then + job_file="default_integrity_nightly.job" +else + job_file="default_integrity.job" +fi +# Run FIO traffic +run_fio $fio_bin --job-file=$COMMON_DIR/fio_jobs/$job_file --out="$TEST_DIR/fio_results" $fio_disks + +notice "Shutting down virtual machines..." +vm_shutdown_all +sleep 2 + +notice "Cleaning up vhost - remove LUNs, controllers, lvol bdevs and lvol stores" +if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then + for (( i=0; i<$vm_count; i++)); do + notice "Removing devices from vhost SCSI controller naa.0.$i" + for (( j=0; j<${#bdevs[@]}; j++)); do + $rpc_py remove_vhost_scsi_target naa.0.$i $j + notice "Removed device $j" + done + notice "Removing vhost SCSI controller naa.0.$i" + $rpc_py remove_vhost_controller naa.0.$i + done +elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then + for (( i=0; i<$vm_count; i++)); do + for (( j=0; j<${#bdevs[@]}; j++)); do + notice "Removing vhost BLK controller naa.$j.$i" + $rpc_py remove_vhost_controller naa.$j.$i + notice "Removed naa.$j.$i" + done + done +fi + +clean_lvol_cfg + +$rpc_py get_lvol_stores +$rpc_py get_bdevs +$rpc_py get_vhost_controllers + +notice "Shutting down SPDK vhost app..." +spdk_vhost_kill diff --git a/src/spdk/test/vhost/migration/autotest.config b/src/spdk/test/vhost/migration/autotest.config new file mode 100644 index 00000000..ccda306e --- /dev/null +++ b/src/spdk/test/vhost/migration/autotest.config @@ -0,0 +1,14 @@ +vhost_0_reactor_mask=["0"] +vhost_0_master_core=0 + +vhost_1_reactor_mask=["0"] +vhost_1_master_core=0 + +VM_0_qemu_mask=1 +VM_0_qemu_numa_node=0 + +VM_1_qemu_mask=1 +VM_1_qemu_numa_node=0 + +VM_2_qemu_mask=1 +VM_2_qemu_numa_node=0 diff --git a/src/spdk/test/vhost/migration/migration-tc1.job b/src/spdk/test/vhost/migration/migration-tc1.job new file mode 100644 index 00000000..5383b243 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc1.job @@ -0,0 +1,25 @@ +[global] +blocksize_range=4k-512k +#bs=512k +iodepth=128 +ioengine=libaio +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_fatal=1 +verify_dump=1 +size=100% + +[write] +rw=write +stonewall + +[randread] +rw=randread +runtime=10 +time_based +stonewall diff --git a/src/spdk/test/vhost/migration/migration-tc1.sh b/src/spdk/test/vhost/migration/migration-tc1.sh new file mode 100644 index 00000000..ec89545d --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc1.sh @@ -0,0 +1,123 @@ +function migration_tc1_clean_vhost_config() +{ + # Restore trap + trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + notice "Removing vhost devices & controllers via RPC ..." + # Delete bdev first to remove all LUNs and SCSI targets + $rpc delete_malloc_bdev Malloc0 + + # Delete controllers + $rpc remove_vhost_controller $incoming_vm_ctrlr + $rpc remove_vhost_controller $target_vm_ctrlr + + unset -v incoming_vm target_vm incoming_vm_ctrlr target_vm_ctrlr rpc +} + +function migration_tc1_configure_vhost() +{ + # Those are global intentionally - they will be unset in cleanup handler + incoming_vm=0 + target_vm=1 + incoming_vm_ctrlr=naa.Malloc0.$incoming_vm + target_vm_ctrlr=naa.Malloc0.$target_vm + rpc="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + + trap 'migration_tc1_error_handler; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + # Construct shared Malloc Bdev + $rpc construct_malloc_bdev -b Malloc0 128 4096 + + # And two controllers - one for each VM. Both are using the same Malloc Bdev as LUN 0 + $rpc construct_vhost_scsi_controller $incoming_vm_ctrlr + $rpc add_vhost_scsi_lun $incoming_vm_ctrlr 0 Malloc0 + + $rpc construct_vhost_scsi_controller $target_vm_ctrlr + $rpc add_vhost_scsi_lun $target_vm_ctrlr 0 Malloc0 +} + +function migration_tc1_error_handler() +{ + trap - SIGINT ERR EXIT + warning "Migration TC1 ERROR HANDLER" + print_backtrace + set -x + + vm_kill_all + migration_tc1_clean_vhost_config + + warning "Migration TC1 FAILED" +} + +function migration_tc1() +{ + # Use 2 VMs: + # incoming VM - the one we want to migrate + # targe VM - the one which will accept migration + local job_file="$MIGRATION_DIR/migration-tc1.job" + + # Run vhost + spdk_vhost_run + migration_tc1_configure_vhost + + notice "Setting up VMs" + vm_setup --os="$os_image" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=Malloc0 --migrate-to=$target_vm + vm_setup --force=$target_vm --disk-type=spdk_vhost_scsi --disks=Malloc0 --incoming=$incoming_vm + + # Run everything + vm_run $incoming_vm $target_vm + + # Wait only for incoming VM, as target is waiting for migration + vm_wait_for_boot 600 $incoming_vm + + # Run fio before migration + notice "Starting FIO" + + vm_check_scsi_location $incoming_vm + run_fio $fio_bin --job-file="$job_file" --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)" + + # Wait a while to let the FIO time to issue some IO + sleep 5 + + # Check if fio is still running before migration + if ! is_fio_running $incoming_vm; then + vm_ssh $incoming_vm "cat /root/$(basename ${job_file}).out" + error "FIO is not running before migration: process crashed or finished too early" + fi + + vm_migrate $incoming_vm + sleep 3 + + # Check if fio is still running after migration + if ! is_fio_running $target_vm; then + vm_ssh $target_vm "cat /root/$(basename ${job_file}).out" + error "FIO is not running after migration: process crashed or finished too early" + fi + + notice "Waiting for fio to finish" + local timeout=40 + while is_fio_running $target_vm; do + sleep 1 + echo -n "." + if (( timeout-- == 0 )); then + error "timeout while waiting for FIO!" + fi + done + + notice "Fio result is:" + vm_ssh $target_vm "cat /root/$(basename ${job_file}).out" + + notice "Migration DONE" + + notice "Shutting down all VMs" + vm_shutdown_all + + migration_tc1_clean_vhost_config + + notice "killing vhost app" + spdk_vhost_kill + + notice "Migration TC1 SUCCESS" +} + +migration_tc1 diff --git a/src/spdk/test/vhost/migration/migration-tc2.job b/src/spdk/test/vhost/migration/migration-tc2.job new file mode 100644 index 00000000..df78a3cd --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc2.job @@ -0,0 +1,20 @@ +[global] +blocksize_range=4k-512k +iodepth=128 +ioengine=libaio +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_fatal=1 +verify_dump=1 +verify_backlog=8 + +[randwrite] +rw=randwrite +runtime=15 +time_based +stonewall diff --git a/src/spdk/test/vhost/migration/migration-tc2.sh b/src/spdk/test/vhost/migration/migration-tc2.sh new file mode 100644 index 00000000..bc4a0f53 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc2.sh @@ -0,0 +1,209 @@ +source $SPDK_BUILD_DIR/test/nvmf/common.sh + +function migration_tc2_cleanup_nvmf_tgt() +{ + local i + + if [[ ! -r "$nvmf_dir/nvmf_tgt.pid" ]]; then + warning "Pid file '$nvmf_dir/nvmf_tgt.pid' does not exist. " + return + fi + + if [[ ! -z "$1" ]]; then + trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + pkill --signal $1 -F $nvmf_dir/nvmf_tgt.pid || true + sleep 5 + if ! pkill -F $nvmf_dir/nvmf_tgt.pid; then + fail "failed to kill nvmf_tgt app" + fi + else + pkill --signal SIGTERM -F $nvmf_dir/nvmf_tgt.pid || true + for (( i=0; i<20; i++ )); do + if ! pkill --signal 0 -F $nvmf_dir/nvmf_tgt.pid; then + break + fi + sleep 0.5 + done + + if pkill --signal 0 -F $nvmf_dir/nvmf_tgt.pid; then + error "nvmf_tgt failed to shutdown" + fi + fi + + rm $nvmf_dir/nvmf_tgt.pid + unset -v nvmf_dir rpc_nvmf +} + +function migration_tc2_cleanup_vhost_config() +{ + timing_enter migration_tc2_cleanup_vhost_config + + trap 'migration_tc2_cleanup_nvmf_tgt SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + notice "Shutting down all VMs" + vm_shutdown_all + + notice "Removing vhost devices & controllers via RPC ..." + # Delete bdev first to remove all LUNs and SCSI targets + $rpc_0 delete_nvme_controller Nvme0 + $rpc_0 remove_vhost_controller $incoming_vm_ctrlr + + $rpc_1 delete_nvme_controller Nvme0 + $rpc_1 remove_vhost_controller $target_vm_ctrlr + + notice "killing vhost app" + spdk_vhost_kill 0 + spdk_vhost_kill 1 + + unset -v incoming_vm target_vm incoming_vm_ctrlr target_vm_ctrlr + unset -v rpc_0 rpc_1 + + trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + migration_tc2_cleanup_nvmf_tgt + + timing_exit migration_tc2_cleanup_vhost_config +} + +function migration_tc2_configure_vhost() +{ + timing_enter migration_tc2_configure_vhost + + # Those are global intentionally - they will be unset in cleanup handler + nvmf_dir="$TEST_DIR/nvmf_tgt" + + incoming_vm=1 + target_vm=2 + incoming_vm_ctrlr=naa.VhostScsi0.$incoming_vm + target_vm_ctrlr=naa.VhostScsi0.$target_vm + + rpc_nvmf="$SPDK_BUILD_DIR/scripts/rpc.py -s $nvmf_dir/rpc.sock" + rpc_0="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock" + rpc_1="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir 1)/rpc.sock" + + # Default cleanup/error handlers will not shutdown nvmf_tgt app so setup it + # here to teardown in cleanup function + trap 'migration_tc2_error_cleanup; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + # Run nvmf_tgt and two vhost instances: + # nvmf_tgt uses core id 2 (-m 0x4) + # First uses core id 0 (vhost_0_reactor_mask=0x1) + # Second uses core id 1 (vhost_1_reactor_mask=0x2) + # This force to use VM 1 and 2. + timing_enter start_nvmf_tgt + notice "Running nvmf_tgt..." + mkdir -p $nvmf_dir + rm -f $nvmf_dir/* + $SPDK_BUILD_DIR/app/nvmf_tgt/nvmf_tgt -s 512 -m 0x4 -r $nvmf_dir/rpc.sock --wait-for-rpc & + local nvmf_tgt_pid=$! + echo $nvmf_tgt_pid > $nvmf_dir/nvmf_tgt.pid + waitforlisten "$nvmf_tgt_pid" "$nvmf_dir/rpc.sock" + $rpc_nvmf start_subsystem_init + $rpc_nvmf nvmf_create_transport -t RDMA -u 8192 -p 4 + $SPDK_BUILD_DIR/scripts/gen_nvme.sh --json | $rpc_nvmf load_subsystem_config + timing_exit start_nvmf_tgt + + spdk_vhost_run --memory=512 --vhost-num=0 --no-pci + # Those are global intentionally + vhost_1_reactor_mask=0x2 + vhost_1_master_core=1 + spdk_vhost_run --memory=512 --vhost-num=1 --no-pci + + local rdma_ip_list=$(get_available_rdma_ips) + local nvmf_target_ip=$(echo "$rdma_ip_list" | head -n 1) + + if [[ -z "$nvmf_target_ip" ]]; then + fail "no NIC for nvmf target" + fi + + notice "Configuring nvmf_tgt, vhost devices & controllers via RPC ..." + + # Construct shared bdevs and controllers + $rpc_nvmf nvmf_subsystem_create nqn.2016-06.io.spdk:cnode1 -a -s SPDK00000000000001 + $rpc_nvmf nvmf_subsystem_add_ns nqn.2016-06.io.spdk:cnode1 Nvme0n1 + $rpc_nvmf nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode1 -t rdma -a $nvmf_target_ip -s 4420 + + $rpc_0 construct_nvme_bdev -b Nvme0 -t rdma -f ipv4 -a $nvmf_target_ip -s 4420 -n "nqn.2016-06.io.spdk:cnode1" + $rpc_0 construct_vhost_scsi_controller $incoming_vm_ctrlr + $rpc_0 add_vhost_scsi_lun $incoming_vm_ctrlr 0 Nvme0n1 + + $rpc_1 construct_nvme_bdev -b Nvme0 -t rdma -f ipv4 -a $nvmf_target_ip -s 4420 -n "nqn.2016-06.io.spdk:cnode1" + $rpc_1 construct_vhost_scsi_controller $target_vm_ctrlr + $rpc_1 add_vhost_scsi_lun $target_vm_ctrlr 0 Nvme0n1 + + notice "Setting up VMs" + vm_setup --os="$os_image" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \ + --migrate-to=$target_vm --memory=1024 --vhost-num=0 + vm_setup --force=$target_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 --incoming=$incoming_vm --memory=1024 \ + --vhost-num=1 + + # Run everything + vm_run $incoming_vm $target_vm + + # Wait only for incoming VM, as target is waiting for migration + vm_wait_for_boot 600 $incoming_vm + + notice "Configuration done" + + timing_exit migration_tc2_configure_vhost +} + +function migration_tc2_error_cleanup() +{ + trap - SIGINT ERR EXIT + set -x + + vm_kill_all + migration_tc2_cleanup_vhost_config + notice "Migration TC2 FAILED" +} + +function migration_tc2() +{ + # Use 2 VMs: + # incoming VM - the one we want to migrate + # targe VM - the one which will accept migration + local job_file="$MIGRATION_DIR/migration-tc2.job" + + migration_tc2_configure_vhost + + # Run fio before migration + notice "Starting FIO" + vm_check_scsi_location $incoming_vm + run_fio $fio_bin --job-file="$job_file" --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)" + + # Wait a while to let the FIO time to issue some IO + sleep 5 + + # Check if fio is still running before migration + if ! is_fio_running $incoming_vm; then + vm_ssh $incoming_vm "cat /root/$(basename ${job_file}).out" + error "FIO is not running before migration: process crashed or finished too early" + fi + + vm_migrate $incoming_vm + sleep 3 + + # Check if fio is still running after migration + if ! is_fio_running $target_vm; then + vm_ssh $target_vm "cat /root/$(basename ${job_file}).out" + error "FIO is not running after migration: process crashed or finished too early" + fi + + notice "Waiting for fio to finish" + local timeout=40 + while is_fio_running $target_vm; do + sleep 1 + echo -n "." + if (( timeout-- == 0 )); then + error "timeout while waiting for FIO!" + fi + done + + notice "Fio result is:" + vm_ssh $target_vm "cat /root/$(basename ${job_file}).out" + + migration_tc2_cleanup_vhost_config + notice "Migration TC2 SUCCESS" +} + +migration_tc2 diff --git a/src/spdk/test/vhost/migration/migration-tc3.job b/src/spdk/test/vhost/migration/migration-tc3.job new file mode 100644 index 00000000..fe192966 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc3.job @@ -0,0 +1,20 @@ +[global] +blocksize=4k-512k +iodepth=128 +ioengine=libaio +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_fatal=1 +verify_dump=1 +verify_backlog=8 + +[randwrite] +rw=randwrite +runtime=15 +time_based +stonewall diff --git a/src/spdk/test/vhost/migration/migration-tc3a.sh b/src/spdk/test/vhost/migration/migration-tc3a.sh new file mode 100644 index 00000000..0f20b994 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc3a.sh @@ -0,0 +1,227 @@ +source $SPDK_BUILD_DIR/test/nvmf/common.sh +source $MIGRATION_DIR/autotest.config + +incoming_vm=1 +target_vm=2 +incoming_vm_ctrlr=naa.VhostScsi0.$incoming_vm +target_vm_ctrlr=naa.VhostScsi0.$target_vm +share_dir=$TEST_DIR/share +spdk_repo_share_dir=$TEST_DIR/share_spdk +job_file=$MIGRATION_DIR/migration-tc3.job + +if [ -z "$MGMT_TARGET_IP" ]; then + error "No IP address of target is given" +fi + +if [ -z "$MGMT_INITIATOR_IP" ]; then + error "No IP address of initiator is given" +fi + +if [ -z "$RDMA_TARGET_IP" ]; then + error "No IP address of targets RDMA capable NIC is given" +fi + +if [ -z "$RDMA_INITIATOR_IP" ]; then + error "No IP address of initiators RDMA capable NIC is given" +fi + +function ssh_remote() +{ + local ssh_cmd="ssh -i $SPDK_VHOST_SSH_KEY_FILE \ + -o UserKnownHostsFile=/dev/null \ + -o StrictHostKeyChecking=no -o ControlMaster=auto \ + root@$1" + + shift + $ssh_cmd "$@" +} + +function wait_for_remote() +{ + local timeout=40 + set +x + while [[ ! -f $share_dir/DONE ]]; do + echo -n "." + if (( timeout-- == 0 )); then + error "timeout while waiting for FIO!" + fi + sleep 1 + done + set -x + rm -f $share_dir/DONE +} + +function check_rdma_connection() +{ + local nic_name=$(ip -4 -o addr show to $RDMA_TARGET_IP up | cut -d' ' -f2) + if [[ -z $nic_name ]]; then + error "There is no NIC with IP address $RDMA_TARGET_IP configured" + fi + + if ! ls /sys/class/infiniband/*/device/net/$nic_name &> /dev/null; then + error "$nic_name with IP $RDMA_TARGET_IP is not a RDMA capable NIC" + fi + +} + +function host1_cleanup_nvmf() +{ + notice "Shutting down nvmf_tgt on local server" + if [[ ! -z "$1" ]]; then + pkill --signal $1 -F $nvmf_dir/nvmf_tgt.pid + else + pkill -F $nvmf_dir/nvmf_tgt.pid + fi + rm -f $nvmf_dir/nvmf_tgt.pid +} + +function host1_cleanup_vhost() +{ + trap 'host1_cleanup_nvmf SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + notice "Shutting down VM $incoming_vm" + vm_kill $incoming_vm + + notice "Removing bdev & controller from vhost on local server" + $rpc_0 delete_nvme_controller Nvme0 + $rpc_0 remove_vhost_controller $incoming_vm_ctrlr + + notice "Shutting down vhost app" + spdk_vhost_kill 0 + + host1_cleanup_nvmf +} + +function host1_start_nvmf() +{ + nvmf_dir="$TEST_DIR/nvmf_tgt" + rpc_nvmf="$SPDK_BUILD_DIR/scripts/rpc.py -s $nvmf_dir/nvmf_rpc.sock" + + notice "Starting nvmf_tgt instance on local server" + mkdir -p $nvmf_dir + rm -rf $nvmf_dir/* + + trap 'host1_cleanup_nvmf SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + $SPDK_BUILD_DIR/app/nvmf_tgt/nvmf_tgt -s 512 -m 0xF -r $nvmf_dir/nvmf_rpc.sock --wait-for-rpc & + nvmf_tgt_pid=$! + echo $nvmf_tgt_pid > $nvmf_dir/nvmf_tgt.pid + waitforlisten "$nvmf_tgt_pid" "$nvmf_dir/nvmf_rpc.sock" + $rpc_nvmf start_subsystem_init + $rpc_nvmf nvmf_create_transport -t RDMA -u 8192 -p 4 + $SPDK_BUILD_DIR/scripts/gen_nvme.sh --json | $rpc_nvmf load_subsystem_config + + $rpc_nvmf nvmf_subsystem_create nqn.2018-02.io.spdk:cnode1 -a -s SPDK01 + $rpc_nvmf nvmf_subsystem_add_ns nqn.2018-02.io.spdk:cnode1 Nvme0n1 + $rpc_nvmf nvmf_subsystem_add_listener nqn.2018-02.io.spdk:cnode1 -t rdma -a $RDMA_TARGET_IP -s 4420 +} + +function host1_start_vhost() +{ + rpc_0="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock" + + notice "Starting vhost0 instance on local server" + trap 'host1_cleanup_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + spdk_vhost_run --vhost-num=0 --no-pci + $rpc_0 construct_nvme_bdev -b Nvme0 -t rdma -f ipv4 -a $RDMA_TARGET_IP -s 4420 -n "nqn.2018-02.io.spdk:cnode1" + $rpc_0 construct_vhost_scsi_controller $incoming_vm_ctrlr + $rpc_0 add_vhost_scsi_lun $incoming_vm_ctrlr 0 Nvme0n1 + + vm_setup --os="$share_dir/migration.qcow2" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \ + --migrate-to=$target_vm --memory=512 --queue_num=1 + + # TODO: Fix loop calculating cpu_num in common.sh + # We need -smp 1 and -queue_num 1 for this test to work, and this loop + # in some cases calculates wrong cpu_num. + sed -i "s#smp 2#smp 1#g" $VM_BASE_DIR/$incoming_vm/run.sh + vm_run $incoming_vm + vm_wait_for_boot 300 $incoming_vm +} + +function cleanup_share() +{ + set +e + notice "Cleaning up share directory on remote and local server" + ssh_remote $MGMT_INITIATOR_IP "umount $VM_BASE_DIR" + ssh_remote $MGMT_INITIATOR_IP "umount $share_dir; rm -f $share_dir/* rm -rf $spdk_repo_share_dir" + rm -f $share_dir/migration.qcow2 + rm -f $share_dir/spdk.tar.gz + set -e +} + +function host_1_create_share() +{ + notice "Creating share directory on local server to re-use on remote" + mkdir -p $share_dir + mkdir -p $VM_BASE_DIR # This dir would've been created later but we need it now + rm -rf $share_dir/spdk.tar.gz $share_dir/spdk || true + cp $os_image $share_dir/migration.qcow2 + tar --exclude="*.o"--exclude="*.d" --exclude="*.git" -C $SPDK_BUILD_DIR -zcf $share_dir/spdk.tar.gz . +} + +function host_2_create_share() +{ + # Copy & compile the sources for later use on remote server. + ssh_remote $MGMT_INITIATOR_IP "uname -a" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $share_dir" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $spdk_repo_share_dir" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $VM_BASE_DIR" + ssh_remote $MGMT_INITIATOR_IP "sshfs -o\ + ssh_command=\"ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ControlMaster=auto\ + -i $SPDK_VHOST_SSH_KEY_FILE\" root@$MGMT_TARGET_IP:$VM_BASE_DIR $VM_BASE_DIR" + ssh_remote $MGMT_INITIATOR_IP "sshfs -o\ + ssh_command=\"ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ControlMaster=auto\ + -i $SPDK_VHOST_SSH_KEY_FILE\" root@$MGMT_TARGET_IP:$share_dir $share_dir" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $spdk_repo_share_dir/spdk" + ssh_remote $MGMT_INITIATOR_IP "tar -zxf $share_dir/spdk.tar.gz -C $spdk_repo_share_dir/spdk --strip-components=1" + ssh_remote $MGMT_INITIATOR_IP "cd $spdk_repo_share_dir/spdk; make clean; ./configure --with-rdma --enable-debug; make -j40" +} + +function host_2_start_vhost() +{ + ssh_remote $MGMT_INITIATOR_IP "nohup $spdk_repo_share_dir/spdk/test/vhost/migration/migration.sh\ + --test-cases=3b --work-dir=$TEST_DIR --os=$share_dir/migration.qcow2\ + --rdma-tgt-ip=$RDMA_TARGET_IP &>$share_dir/output.log &" + notice "Waiting for remote to be done with vhost & VM setup..." + wait_for_remote +} + +function setup_share() +{ + trap 'cleanup_share; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + host_1_create_share + host_2_create_share +} + +function migration_tc3() +{ + check_rdma_connection + setup_share + host1_start_nvmf + host1_start_vhost + host_2_start_vhost + + # Do migration + notice "Starting fio on local VM" + vm_check_scsi_location $incoming_vm + + run_fio $fio_bin --job-file="$job_file" --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)" + sleep 5 + + if ! is_fio_running $incoming_vm; then + vh_ssh $incoming_vm "cat /root/$(basename ${job_file}).out" + error "Fio not running on local VM before starting migration!" + fi + + vm_migrate $incoming_vm $RDMA_INITIATOR_IP + sleep 1 + + # Verify migration on remote host and clean up vhost + ssh_remote $MGMT_INITIATOR_IP "pkill -CONT -F $TEST_DIR/tc3b.pid" + notice "Waiting for remote to finish FIO on VM and clean up..." + wait_for_remote + + # Clean up local stuff + host1_cleanup_vhost + cleanup_share +} + +migration_tc3 diff --git a/src/spdk/test/vhost/migration/migration-tc3b.sh b/src/spdk/test/vhost/migration/migration-tc3b.sh new file mode 100644 index 00000000..babba0dc --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc3b.sh @@ -0,0 +1,79 @@ +# Set -m option is needed to be able to use "suspend" command +# as we are usin non-interactive session to connect to remote. +# Without -m it would be not possible to suspend the process. +set -m +source $MIGRATION_DIR/autotest.config + +incoming_vm=1 +target_vm=2 +target_vm_ctrl=naa.VhostScsi0.$target_vm +rpc="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir 1)/rpc.sock" +share_dir=$TEST_DIR/share + +function host_2_cleanup_vhost() +{ + notice "Shutting down VM $target_vm" + vm_kill $target_vm + + notice "Removing bdev & controller from vhost 1 on remote server" + $rpc delete_nvme_controller Nvme0 + $rpc remove_vhost_controller $target_vm_ctrl + + notice "Shutting down vhost app" + spdk_vhost_kill 1 + sleep 1 +} + +function host_2_start_vhost() +{ + echo "BASE DIR $TEST_DIR" + vhost_work_dir=$TEST_DIR/vhost1 + mkdir -p $vhost_work_dir + rm -f $vhost_work_dir/* + + notice "Starting vhost 1 instance on remote server" + trap 'host_2_cleanup_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + spdk_vhost_run --vhost-num=1 --no-pci + + $rpc construct_nvme_bdev -b Nvme0 -t rdma -f ipv4 -a $RDMA_TARGET_IP -s 4420 -n "nqn.2018-02.io.spdk:cnode1" + $rpc construct_vhost_scsi_controller $target_vm_ctrl + $rpc add_vhost_scsi_lun $target_vm_ctrl 0 Nvme0n1 + + vm_setup --os="$os_image" --force=$target_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \ + --memory=512 --vhost-num=1 --incoming=$incoming_vm + vm_run $target_vm + sleep 1 + + # Use this file as a flag to notify main script + # that setup on remote server is done + echo "DONE" > $share_dir/DONE +} + +echo $$ > $TEST_DIR/tc3b.pid +host_2_start_vhost +suspend -f + +if ! vm_os_booted $target_vm; then + fail "VM$target_vm is not running!" +fi + +if ! is_fio_running $target_vm; then + vm_ssh $target_vm "cat /root/migration-tc3.job.out" + error "FIO is not running on remote server after migration!" +fi + +notice "Waiting for FIO to finish on remote server VM" +timeout=40 +while is_fio_running $target_vm; do + sleep 1 + echo -n "." + if (( timeout-- == 0 )); then + error "timeout while waiting for FIO!" + fi +done + +notice "FIO result after migration:" +vm_ssh $target_vm "cat /root/migration-tc3.job.out" + +host_2_cleanup_vhost +echo "DONE" > $share_dir/DONE diff --git a/src/spdk/test/vhost/migration/migration.sh b/src/spdk/test/vhost/migration/migration.sh new file mode 100755 index 00000000..bdfcd845 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash + +set -e + +vms=() +declare -A vms_os +declare -A vms_raw_disks +declare -A vms_ctrlrs +declare -A vms_ctrlrs_disks + +# By default use Guest fio +fio_bin="" +test_cases="" +MGMT_TARGET_IP="" +MGMT_INITIATOR_IP="" +RDMA_TARGET_IP="" +RDMA_INITIATOR_IP="" +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for doing automated test of live migration." + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: $TEST_DIR]" + echo " --os ARGS VM configuration. This parameter might be used more than once:" + echo " --fio-bin=FIO Use specific fio binary (will be uploaded to VM)" + echo " --test-cases=TESTS Coma-separated list of tests to run. Implemented test cases are: 1" + echo " See test/vhost/test_plan.md for more info." + echo " --mgmt-tgt-ip=IP IP address of target." + echo " --mgmt-init-ip=IP IP address of initiator." + echo " --rdma-tgt-ip=IP IP address of targets rdma capable NIC." + echo " --rdma-init-ip=IP IP address of initiators rdma capable NIC." + echo "-x set -x for script debug" +} + +for param in "$@"; do + case "$param" in + --help|-h) + usage $0 + exit 0 + ;; + --work-dir=*) TEST_DIR="${param#*=}" ;; + --os=*) os_image="${param#*=}" ;; + --fio-bin=*) fio_bin="${param}" ;; + --test-cases=*) test_cases="${param#*=}" ;; + --mgmt-tgt-ip=*) MGMT_TARGET_IP="${param#*=}" ;; + --mgmt-init-ip=*) MGMT_INITIATOR_IP="${param#*=}" ;; + --rdma-tgt-ip=*) RDMA_TARGET_IP="${param#*=}" ;; + --rdma-init-ip=*) RDMA_INITIATOR_IP="${param#*=}" ;; + -x) set -x ;; + -v) SPDK_VHOST_VERBOSE=true ;; + *) + usage $0 "Invalid argument '$param'" + exit 1;; + esac +done + +. $(readlink -e "$(dirname $0)/../common/common.sh") || exit 1 +MIGRATION_DIR=$(readlink -f $(dirname $0)) + +[[ ! -z "$test_cases" ]] || fail "Need '--test-cases=' parameter" + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + +function vm_monitor_send() +{ + local vm_num=$1 + local cmd_result_file="$2" + local vm_dir="$VM_BASE_DIR/$1" + local vm_monitor_port=$(cat $vm_dir/monitor_port) + + [[ ! -z "$vm_monitor_port" ]] || fail "No monitor port!" + + shift 2 + nc 127.0.0.1 $vm_monitor_port "$@" > $cmd_result_file +} + +# Migrate VM $1 +function vm_migrate() +{ + local from_vm_dir="$VM_BASE_DIR/$1" + local target_vm_dir="$(readlink -e $from_vm_dir/vm_migrate_to)" + local target_vm="$(basename $target_vm_dir)" + local target_vm_migration_port="$(cat $target_vm_dir/migration_port)" + if [[ -n "$2" ]]; then + local target_ip=$2 + else + local target_ip="127.0.0.1" + fi + + # Sanity check if target VM (QEMU) is configured to accept source VM (QEMU) migration + if [[ "$(readlink -e ${target_vm_dir}/vm_incoming)" != "$(readlink -e ${from_vm_dir})" ]]; then + fail "source VM $1 or destination VM is not properly configured for live migration" + fi + + timing_enter vm_migrate + notice "Migrating VM $1 to VM "$(basename $target_vm_dir) + echo -e \ + "migrate_set_speed 1g\n" \ + "migrate tcp:$target_ip:$target_vm_migration_port\n" \ + "info migrate\n" \ + "quit" | vm_monitor_send $1 "$from_vm_dir/migration_result" + + # Post migration checks: + if ! grep "Migration status: completed" $from_vm_dir/migration_result -q; then + cat $from_vm_dir/migration_result + fail "Migration failed:\n" + fi + + # Don't perform the following check if target VM is on remote server + # as we won't have access to it. + # If you need this check then perform it on your own. + if [[ "$target_ip" == "127.0.0.1" ]]; then + if ! vm_os_booted $target_vm; then + fail "VM$target_vm is not running" + cat $target_vm $target_vm_dir/cont_result + fi + fi + + notice "Migration complete" + timing_exit vm_migrate +} + +function is_fio_running() +{ + local shell_restore_x="$( [[ "$-" =~ x ]] && echo 'set -x' )" + set +x + + if vm_ssh $1 'kill -0 $(cat /root/fio.pid)'; then + local ret=0 + else + local ret=1 + fi + + $shell_restore_x + return $ret +} + +for test_case in ${test_cases//,/ }; do + assert_number "$test_case" + notice "===============================" + notice "Running Migration test case ${test_case}" + notice "===============================" + + timing_enter migration-tc${test_case} + source $MIGRATION_DIR/migration-tc${test_case}.sh + timing_exit migration-tc${test_case} +done + +notice "Migration Test SUCCESS" +notice "===============" + +trap - SIGINT ERR EXIT diff --git a/src/spdk/test/vhost/other/conf.json b/src/spdk/test/vhost/other/conf.json new file mode 100644 index 00000000..7a60c68c --- /dev/null +++ b/src/spdk/test/vhost/other/conf.json @@ -0,0 +1,43 @@ +{ + "subsystems": [ + { + "subsystem": "copy", + "config": null + }, + { + "subsystem": "interface", + "config": null + }, + { + "subsystem": "net_framework", + "config": null + }, + { + "subsystem": "bdev", + "config": [ + { + "params": { + "block_size": 4096, + "num_blocks": 32768 + }, + "method": "construct_malloc_bdev" + }, + { + "params": { + "block_size": 4096, + "num_blocks": 32768 + }, + "method": "construct_malloc_bdev" + } + ] + }, + { + "subsystem": "nbd", + "config": [] + }, + { + "subsystem": "scsi", + "config": null + } + ] +} diff --git a/src/spdk/test/vhost/other/negative.sh b/src/spdk/test/vhost/other/negative.sh new file mode 100755 index 00000000..5728a283 --- /dev/null +++ b/src/spdk/test/vhost/other/negative.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +NEGATIVE_BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $NEGATIVE_BASE_DIR/../common && pwd)" +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $NEGATIVE_BASE_DIR/../../../../ && pwd)" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for running vhost app." + echo "Usage: $(basename $1) [-x] [-h|--help] [--clean-build] [--work-dir=PATH]" + echo "-h, --help print help and exit" + echo "-x Set -x for script debug" + echo " --work-dir=PATH Where to find source/project. [default=$TEST_DIR]" + + exit 0 +} + +run_in_background=false +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + work-dir=*) TEST_DIR="${OPTARG#*=}" ;; + conf-dir=*) CONF_DIR="${OPTARG#*=}" ;; + *) usage $0 echo "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x ;; + *) usage $0 "Invalid argument '$optchar'" ;; + esac +done + + +. $COMMON_DIR/common.sh + +trap error_exit ERR + +VHOST_APP="$SPDK_BUILD_DIR/app/vhost/vhost" + +notice "Testing vhost command line arguments" +# Printing help will force vhost to exit without error +$VHOST_APP -c /path/to/non_existing_file/conf -S $NEGATIVE_BASE_DIR -e 0x0 -s 1024 -d -h --silence-noticelog + +# Testing vhost create pid file option. Vhost will exit with error as invalid config path is given +if $VHOST_APP -c /path/to/non_existing_file/conf -f $SPDK_VHOST_SCSI_TEST_DIR/vhost.pid; then + fail "vhost started when specifying invalid config file" +fi + +# Expecting vhost to fail if an incorrect argument is given +if $VHOST_APP -x -h; then + fail "vhost started with invalid -x command line option" +fi + +# Passing trace flags if spdk is build without CONFIG_DEBUG=y option make vhost exit with error +if ! $VHOST_APP -t vhost_scsi -h; then + warning "vhost did not started with trace flags enabled but ignoring this as it might not be a debug build" +fi + +if [[ $RUN_NIGHTLY -eq 1 ]]; then + # Run with valid config and try some negative rpc calls + notice "===============" + notice "" + notice "running SPDK" + notice "" + spdk_vhost_run --json-path=$NEGATIVE_BASE_DIR + notice "" + + rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + + # General commands + notice "Trying to remove nonexistent controller" + if $rpc_py remove_vhost_controller unk0 > /dev/null; then + error "Removing nonexistent controller succeeded, but it shouldn't" + fi + + # SCSI + notice "Trying to create scsi controller with incorrect cpumask" + if $rpc_py construct_vhost_scsi_controller vhost.invalid.cpumask --cpumask 0x2; then + error "Creating scsi controller with incorrect cpumask succeeded, but it shouldn't" + fi + + notice "Trying to remove device from nonexistent scsi controller" + if $rpc_py remove_vhost_scsi_target vhost.nonexistent.name 0; then + error "Removing device from nonexistent scsi controller succeeded, but it shouldn't" + fi + + notice "Trying to add device to nonexistent scsi controller" + if $rpc_py add_vhost_scsi_lun vhost.nonexistent.name 0 Malloc0; then + error "Adding device to nonexistent scsi controller succeeded, but it shouldn't" + fi + + notice "Trying to create scsi controller with incorrect name" + if $rpc_py construct_vhost_scsi_controller .; then + error "Creating scsi controller with incorrect name succeeded, but it shouldn't" + fi + + notice "Creating controller naa.0" + $rpc_py construct_vhost_scsi_controller naa.0 + + notice "Adding initial device (0) to naa.0" + $rpc_py add_vhost_scsi_lun naa.0 0 Malloc0 + + notice "Trying to remove nonexistent device on existing controller" + if $rpc_py remove_vhost_scsi_target naa.0 1 > /dev/null; then + error "Removing nonexistent device (1) from controller naa.0 succeeded, but it shouldn't" + fi + + notice "Trying to remove existing device from a controller" + $rpc_py remove_vhost_scsi_target naa.0 0 + + notice "Trying to remove a just-deleted device from a controller again" + if $rpc_py remove_vhost_scsi_target naa.0 0 > /dev/null; then + error "Removing device 0 from controller naa.0 succeeded, but it shouldn't" + fi + + notice "Re-adding device 0 to naa.0" + $rpc_py add_vhost_scsi_lun naa.0 0 Malloc0 + + # BLK + notice "Trying to create block controller with incorrect cpumask" + if $rpc_py construct_vhost_blk_controller vhost.invalid.cpumask Malloc0 --cpumask 0x2; then + error "Creating block controller with incorrect cpumask succeeded, but it shouldn't" + fi + + notice "Trying to remove nonexistent block controller" + if $rpc_py remove_vhost_controller vhost.nonexistent.name; then + error "Removing nonexistent block controller succeeded, but it shouldn't" + fi + + notice "Trying to create block controller with incorrect name" + if $rpc_py construct_vhost_blk_controller . Malloc0; then + error "Creating block controller with incorrect name succeeded, but it shouldn't" + fi + + notice "Testing done -> shutting down" + notice "killing vhost app" + spdk_vhost_kill + + notice "EXIT DONE" + notice "===============" +fi diff --git a/src/spdk/test/vhost/perf_bench/vhost_perf.sh b/src/spdk/test/vhost/perf_bench/vhost_perf.sh new file mode 100755 index 00000000..3789c8f1 --- /dev/null +++ b/src/spdk/test/vhost/perf_bench/vhost_perf.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +set -e + +vm_count=1 +vm_memory=2048 +vm_image="/home/sys_sgsw/vhost_vm_image.qcow2" +max_disks="" +ctrl_type="spdk_vhost_scsi" +use_split=false +throttle=false + +lvol_stores=() +lvol_bdevs=() +used_vms="" + +fio_bin="--fio-bin=/home/sys_sgsw/fio_ubuntu" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for doing automated test" + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo "-h, --help Print help and exit" + echo " --fio-bin=PATH Path to FIO binary on host.;" + echo " Binary will be copied to VM, static compilation" + echo " of binary is recommended." + echo " --fio-job=PATH Fio config to use for test." + echo " --vm-count=INT Total number of virtual machines to launch in this test;" + echo " Each VM will get one bdev (lvol or split vbdev)" + echo " to run FIO test." + echo " Default: 1" + echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM." + echo " Default: 2048 MB" + echo " --vm-image=PATH OS image to use for running the VMs." + echo " Default: /home/sys_sgsw/vhost_vm_image.qcow2" + echo " --max-disks=INT Maximum number of NVMe drives to use in test." + echo " Default: will use all available NVMes." + echo " --ctrl-type=TYPE Controller type to use for test:" + echo " spdk_vhost_scsi - use spdk vhost scsi" + echo " spdk_vhost_blk - use spdk vhost block" + echo " Default: spdk_vhost_scsi" + echo " --use-split Use split vbdevs instead of Logical Volumes" + echo " --throttle=INT I/Os throttle rate in IOPS for each device on the VMs." + echo " --custom-cpu-cfg=PATH Custom CPU config for test." + echo " Default: spdk/test/vhost/common/autotest.config" + echo "-x set -x for script debug" + exit 0 +} + +function cleanup_lvol_cfg() +{ + notice "Removing lvol bdevs" + for lvol_bdev in "${lvol_bdevs[@]}"; do + $rpc_py destroy_lvol_bdev $lvol_bdev + notice "lvol bdev $lvol_bdev removed" + done + + notice "Removing lvol stores" + for lvol_store in "${lvol_stores[@]}"; do + $rpc_py destroy_lvol_store -u $lvol_store + notice "lvol store $lvol_store removed" + done +} + +function cleanup_split_cfg() +{ + notice "Removing split vbdevs" + for (( i=0; i<$max_disks; i++ ));do + $rpc_py destruct_split_vbdev Nvme${i}n1 + done +} + +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 ;; + fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;; + fio-job=*) fio_job="${OPTARG#*=}" ;; + vm-count=*) vm_count="${OPTARG#*=}" ;; + vm-memory=*) vm_memory="${OPTARG#*=}" ;; + vm-image=*) vm_image="${OPTARG#*=}" ;; + max-disks=*) max_disks="${OPTARG#*=}" ;; + ctrl-type=*) ctrl_type="${OPTARG#*=}" ;; + use-split) use_split=true ;; + throttle) throttle=true ;; + custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;; + thin-provisioning) thin=" -t " ;; + multi-os) multi_os=true ;; + *) usage $0 "Invalid argument '$OPTARG'" ;; + esac + ;; + h) usage $0 ;; + x) set -x + x="-x" ;; + *) usage $0 "Invalid argument '$OPTARG'" + esac +done + +. $(readlink -e "$(dirname $0)/../common/common.sh") || exit 1 +. $(readlink -e "$(dirname $0)/../../../scripts/common.sh") || exit 1 +COMMON_DIR="$(cd $(readlink -f $(dirname $0))/../common && pwd)" +rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +if [[ -n $custom_cpu_cfg ]]; then + source $custom_cpu_cfg +fi + +if [[ -z $fio_job ]]; then + warning "No FIO job specified! Will use default from common directory." + fio_job="$COMMON_DIR/fio_jobs/default_integrity.job" +fi + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR +notice "Get NVMe disks:" +nvmes=($(iter_pci_class_code 01 08 02)) + +if [[ -z $max_disks ]]; then + max_disks=${#nvmes[@]} +fi + +if [[ ${#nvmes[@]} -lt max_disks ]]; then + fail "Number of NVMe drives (${#nvmes[@]}) is lower than number of requested disks for test ($max_disks)" +fi + +notice "running SPDK vhost" +spdk_vhost_run +notice "..." + +# Calculate number of needed splits per NVMe +# so that each VM gets it's own bdev during test +splits=() + +#Calculate least minimum number of splits on each disks +for i in `seq 0 $((max_disks - 1))`; do + splits+=( $((vm_count / max_disks)) ) +done + +# Split up the remainder +for i in `seq 0 $((vm_count % max_disks - 1))`; do + (( splits[i]++ )) +done + +notice "Preparing NVMe setup..." +notice "Using $max_disks physical NVMe drives" +notice "Nvme split list: ${splits[@]}" +# Prepare NVMes - Lvols or Splits +if [[ $use_split == true ]]; then + notice "Using split vbdevs" + trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR + split_bdevs=() + for (( i=0; i<$max_disks; i++ ));do + out=$($rpc_py construct_split_vbdev Nvme${i}n1 ${splits[$i]}) + for s in $out; do + split_bdevs+=("$s") + done + done + bdevs=("${split_bdevs[@]}") +else + notice "Using logical volumes" + trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR + for (( i=0; i<$max_disks; i++ ));do + ls_guid=$($rpc_py construct_lvol_store Nvme${i}n1 lvs_$i) + lvol_stores+=("$ls_guid") + for (( j=0; j<${splits[$i]}; j++)); do + free_mb=$(get_lvs_free_mb "$ls_guid") + size=$((free_mb / (${splits[$i]}-j) )) + lb_name=$($rpc_py construct_lvol_bdev -u $ls_guid lbd_$j $size) + lvol_bdevs+=("$lb_name") + done + done + bdevs=("${lvol_bdevs[@]}") +fi + +# Prepare VMs and controllers +for (( i=0; i<$vm_count; i++)); do + vm="vm_$i" + + setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i" + setup_cmd+=" --os=$vm_image" + + if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then + $rpc_py construct_vhost_scsi_controller naa.0.$i + $rpc_py add_vhost_scsi_lun naa.0.$i 0 ${bdevs[$i]} + setup_cmd+=" --disks=0" + elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then + $rpc_py construct_vhost_blk_controller naa.$i.$i ${bdevs[$i]} + setup_cmd+=" --disks=$i" + fi + $setup_cmd + used_vms+=" $i" +done + +# Start VMs +# Run VMs +vm_run $used_vms +vm_wait_for_boot 300 $used_vms + +# Run FIO +fio_disks="" +for vm_num in $used_vms; do + vm_dir=$VM_BASE_DIR/$vm_num + host_name="VM-$vm_num" + vm_ssh $vm_num "hostname $host_name" + vm_start_fio_server $fio_bin $vm_num + + if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then + vm_check_scsi_location $vm_num + elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then + vm_check_blk_location $vm_num + fi + + fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)" +done + +# Run FIO traffic +run_fio $fio_bin --job-file="$fio_job" --out="$TEST_DIR/fio_results" --json $fio_disks + +notice "Shutting down virtual machines..." +vm_shutdown_all + +#notice "Shutting down SPDK vhost app..." +if [[ $use_split == true ]]; then + cleanup_split_cfg +else + cleanup_lvol_cfg +fi +spdk_vhost_kill diff --git a/src/spdk/test/vhost/readonly/delete_partition_vm.sh b/src/spdk/test/vhost/readonly/delete_partition_vm.sh new file mode 100755 index 00000000..18230896 --- /dev/null +++ b/src/spdk/test/vhost/readonly/delete_partition_vm.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -xe +BASE_DIR=$(readlink -f $(dirname $0)) + +disk_name="vda" +test_folder_name="readonly_test" +test_file_name="some_test_file" + +function error() +{ + echo "===========" + echo -e "ERROR: $@" + echo "===========" + trap - ERR + set +e + umount "$test_folder_name" + rm -rf "$BASE_DIR/$test_folder_name" + exit 1 +} + +trap 'error "In delete_partition_vm.sh, line:" "${LINENO}"' ERR + +if [[ ! -d "/sys/block/$disk_name" ]]; then + error "No vhost-blk disk found!" +fi + +if (( $(lsblk -r -n -o RO -d "/dev/$disk_name") == 1 )); then + error "Vhost-blk disk is set as readonly!" +fi + +mkdir -p $test_folder_name + +echo "INFO: Mounting disk" +mount /dev/$disk_name"1" $test_folder_name + +echo "INFO: Removing folder and unmounting $test_folder_name" +umount "$test_folder_name" +rm -rf "$BASE_DIR/$test_folder_name" + +echo "INFO: Deleting partition" +echo -e "d\n1\nw" | fdisk /dev/$disk_name diff --git a/src/spdk/test/vhost/readonly/disabled_readonly_vm.sh b/src/spdk/test/vhost/readonly/disabled_readonly_vm.sh new file mode 100755 index 00000000..bd202433 --- /dev/null +++ b/src/spdk/test/vhost/readonly/disabled_readonly_vm.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +set -xe +BASE_DIR=$(readlink -f $(dirname $0)) + +disk_name="vda" +test_folder_name="readonly_test" +test_file_name="some_test_file" + +function error() +{ + echo "===========" + echo -e "ERROR: $@" + echo "===========" + trap - ERR + set +e + umount "$test_folder_name" + rm -rf "$BASE_DIR/$test_folder_name" + exit 1 +} + +trap 'error "In disabled_readonly_vm.sh, line:" "${LINENO}"' ERR + +if [[ ! -d "/sys/block/$disk_name" ]]; then + error "No vhost-blk disk found!" +fi + +if (( $(lsblk -r -n -o RO -d "/dev/$disk_name") == 1 )); then + error "Vhost-blk disk is set as readonly!" +fi + +parted -s /dev/$disk_name mklabel gpt +parted -s /dev/$disk_name mkpart primary 2048s 100% +partprobe +sleep 0.1 + +echo "INFO: Creating file system" +mkfs.ext4 -F /dev/$disk_name"1" + +echo "INFO: Mounting disk" +mkdir -p $test_folder_name +mount /dev/$disk_name"1" $test_folder_name + +echo "INFO: Creating a test file $test_file_name" +truncate -s "200M" $test_folder_name/$test_file_name +umount "$test_folder_name" +rm -rf "$BASE_DIR/$test_folder_name" diff --git a/src/spdk/test/vhost/readonly/enabled_readonly_vm.sh b/src/spdk/test/vhost/readonly/enabled_readonly_vm.sh new file mode 100755 index 00000000..79cf1ae5 --- /dev/null +++ b/src/spdk/test/vhost/readonly/enabled_readonly_vm.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +set -x +BASE_DIR=$(readlink -f $(dirname $0)) + +disk_name="vda" +test_folder_name="readonly_test" +test_file_name="some_test_file" + +function error() +{ + echo "===========" + echo -e "ERROR: $@" + echo "===========" + umount "$test_folder_name" + rm -rf "$BASE_DIR/$test_folder_name" + exit 1 +} + +if [[ ! -d "/sys/block/$disk_name" ]]; then + error "No vhost-blk disk found!" +fi + +if (( $(lsblk -r -n -o RO -d "/dev/$disk_name") == 0 )); then + error "Vhost-blk disk is not set as readonly!" +fi + +echo "INFO: Found vhost-blk disk with readonly flag" +if [[ ! -b "/dev/$disk_name"1"" ]]; then + error "Partition not found!" +fi + +mkdir $BASE_DIR/$test_folder_name +if [[ $? != 0 ]]; then + error "Failed to create test folder $test_folder_name" +fi + +echo "INFO: Mounting partition" +mount /dev/$disk_name"1" $BASE_DIR/$test_folder_name +if [[ $? != 0 ]]; then + error "Failed to mount partition $disk_name""1" +fi + +echo "INFO: Trying to create file on readonly disk" +truncate -s "200M" $test_folder_name/$test_file_name"_on_readonly" +if [[ $? == 0 ]]; then + error "Created a file on a readonly disk!" +fi + +if [[ -f $test_folder_name/$test_file_name ]]; then + echo "INFO: Trying to delete previously created file" + rm $test_folder_name/$test_file_name + if [[ $? == 0 ]]; then + error "Deleted a file from a readonly disk!" + fi +else + error "Previously created file not found!" +fi + +echo "INFO: Copying file from readonly disk" +cp $test_folder_name/$test_file_name $BASE_DIR +if ! rm $BASE_DIR/$test_file_name; then + error "Copied file from a readonly disk was not found!" +fi + +umount "$test_folder_name" +rm -rf "$BASE_DIR/$test_folder_name" +echo "INFO: Trying to create file system on a readonly disk" +if mkfs.ext4 -F /dev/$disk_name"1"; then + error "Created file system on a readonly disk!" +fi + +echo "INFO: Trying to delete partition from readonly disk" +if echo -e "d\n1\nw" | fdisk /dev/$disk_name; then + error "Deleted partition from readonly disk!" +fi diff --git a/src/spdk/test/vhost/readonly/readonly.sh b/src/spdk/test/vhost/readonly/readonly.sh new file mode 100755 index 00000000..d0b7968f --- /dev/null +++ b/src/spdk/test/vhost/readonly/readonly.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash + +set -e +READONLY_BASE_DIR=$(readlink -f $(dirname $0)) +[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $READONLY_BASE_DIR/../../../../ && pwd)" +[[ -z "$COMMON_DIR" ]] && COMMON_DIR="$(cd $READONLY_BASE_DIR/../common && pwd)" +source $COMMON_DIR/common.sh + +rpc_py="$READONLY_BASE_DIR/../../../scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" + +vm_img="" +disk="Nvme0n1" +x="" + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Shortcut script for automated readonly test for vhost-block" + echo "For test details check test_plan.md" + echo + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo "-h, --help Print help and exit" + echo " --vm_image= Path to VM image" + echo " --disk= Disk name." + echo " If disk=malloc, then creates malloc disk. For malloc disks, size is always 512M," + echo " e.g. --disk=malloc. (Default: Nvme0n1)" + echo "-x set -x for script debug" +} + +while getopts 'xh-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) usage $0 && exit 0;; + vm_image=*) vm_img="${OPTARG#*=}" ;; + disk=*) disk="${OPTARG#*=}" ;; + *) usage $0 "Invalid argument '$OPTARG'" && exit 1 + esac + ;; + h) usage $0 && exit 0 ;; + x) set -x + x="-x" ;; + *) usage $0 "Invalid argument '$OPTARG'" && exit 1 + esac +done + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' ERR + +if [[ $EUID -ne 0 ]]; then + fail "Go away user come back as root" +fi + +function print_tc_name() +{ + notice "" + notice "===============================================================" + notice "Now running: $1" + notice "===============================================================" +} + +function blk_ro_tc1() +{ + print_tc_name ${FUNCNAME[0]} + local vm_no="0" + local disk_name=$disk + local vhost_blk_name="" + local vm_dir="$TEST_DIR/vms/$vm_no" + + if [[ $disk =~ .*malloc.* ]]; then + disk_name=$($rpc_py construct_malloc_bdev 512 4096) + if [ $? != 0 ]; then + fail "Failed to create malloc bdev" + fi + + disk=$disk_name + else + disk_name=${disk%%_*} + if ! $rpc_py get_bdevs | jq -r '.[] .name' | grep -qi $disk_name$; then + fail "$disk_name bdev not found!" + fi + fi + +#Create controller and create file on disk for later test + notice "Creating vhost_blk controller" + vhost_blk_name="naa.$disk_name.$vm_no" + $rpc_py construct_vhost_blk_controller $vhost_blk_name $disk_name + vm_setup --disk-type=spdk_vhost_blk --force=$vm_no --os=$vm_img --disks=$disk --read-only=true + + vm_run $vm_no + vm_wait_for_boot 600 $vm_no + notice "Preparing partition and file on guest VM" + vm_ssh $vm_no "bash -s" < $READONLY_BASE_DIR/disabled_readonly_vm.sh + sleep 1 + + vm_shutdown_all +#Create readonly controller and test readonly feature + notice "Removing controller and creating new one with readonly flag" + $rpc_py remove_vhost_controller $vhost_blk_name + $rpc_py construct_vhost_blk_controller -r $vhost_blk_name $disk_name + + vm_run $vm_no + vm_wait_for_boot 600 $vm_no + notice "Testing readonly feature on guest VM" + vm_ssh $vm_no "bash -s" < $READONLY_BASE_DIR/enabled_readonly_vm.sh + sleep 3 + + vm_shutdown_all +#Delete file from disk and delete partition + echo "INFO: Removing controller and creating new one" + $rpc_py remove_vhost_controller $vhost_blk_name + $rpc_py construct_vhost_blk_controller $vhost_blk_name $disk_name + + vm_run $vm_no + vm_wait_for_boot 600 $vm_no + notice "Removing partition and file from test disk on guest VM" + vm_ssh $vm_no "bash -s" < $READONLY_BASE_DIR/delete_partition_vm.sh + sleep 1 + + vm_shutdown_all +} + +spdk_vhost_run +if [[ -z $x ]]; then + set +x +fi + +blk_ro_tc1 + +$rpc_py delete_nvme_controller Nvme0 + +spdk_vhost_kill diff --git a/src/spdk/test/vhost/readonly/test_plan.md b/src/spdk/test/vhost/readonly/test_plan.md new file mode 100644 index 00000000..957000e8 --- /dev/null +++ b/src/spdk/test/vhost/readonly/test_plan.md @@ -0,0 +1,30 @@ +# vhost-block readonly feature test plan + +## Objective +Vhost block controllers can be created with readonly feature which prevents any write operations on this device. +The purpose of this test is to verify proper operation of this feature. + +## Test cases description +To test readonly feature, this test will create normal vhost-blk controller with NVMe device and on a VM it will +create and mount a partition to which it will copy a file. Next it will poweroff a VM, remove vhost controller and +create new readonly vhost-blk controller with the same device. + +## Test cases + +### blk_ro_tc1 +1. Start vhost +2. Create vhost-blk controller with NVMe device and readonly feature disabled using RPC +3. Run VM with attached vhost-blk controller +4. Check visibility of readonly flag using lsblk, fdisk +5. Create new partition +6. Create new file on new partition +7. Shutdown VM, remove vhost controller +8. Create vhost-blk with previously used NVMe device and readonly feature now enabled using RPC +9. Run VM with attached vhost-blk controller +10. Check visibility of readonly flag using lsblk, fdisk +11. Try to delete previous file +12. Try to create new file +13. Try to remove partition +14. Repeat steps 2 to 4 +15. Remove file from disk, delete partition +16. Shutdown VM, exit vhost diff --git a/src/spdk/test/vhost/spdk_vhost.sh b/src/spdk/test/vhost/spdk_vhost.sh new file mode 100755 index 00000000..a7c0a6ba --- /dev/null +++ b/src/spdk/test/vhost/spdk_vhost.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash +rootdir=$(readlink -f $(dirname $0))/../.. +source "$rootdir/test/common/autotest_common.sh" + +set -e + +DEFAULT_VM_IMAGE="/home/sys_sgsw/vhost_vm_image.qcow2" +CENTOS_VM_IMAGE="/home/sys_sgsw/spdk_vhost_CentOS_vm_image.qcow2" +DEFAULT_FIO_BIN="/home/sys_sgsw/fio_ubuntu" +CENTOS_FIO_BIN="/home/sys_sgsw/fio_ubuntu_bak" + +case $1 in + -h|--help) + echo "usage: $(basename $0) TEST_TYPE" + echo "Test type can be:" + echo " -i |--integrity for running an integrity test with vhost scsi" + echo " -fs|--fs-integrity-scsi for running an integrity test with filesystem" + echo " -fb|--fs-integrity-blk for running an integrity test with filesystem" + echo " -p |--performance for running a performance test with vhost scsi" + echo " -ib|--integrity-blk for running an integrity test with vhost blk" + echo " -pb|--performance-blk for running a performance test with vhost blk" + echo " -ils|--integrity-lvol-scsi for running an integrity test with vhost scsi and lvol backends" + echo " -ilb|--integrity-lvol-blk for running an integrity test with vhost blk and lvol backends" + echo " -ilsn|--integrity-lvol-scsi-nightly for running an nightly integrity test with vhost scsi and lvol backends" + echo " -ilbn|--integrity-lvol-blk-nightly for running an nightly integrity test with vhost blk and lvol backends" + echo " -hp|--hotplug for running hotplug tests" + echo " -shr|--scsi-hot-remove for running scsi hot remove tests" + echo " -bhr|--blk-hot-remove for running blk hot remove tests" + echo " -ro|--readonly for running readonly test for vhost blk" + echo " -b|--boot for booting vm from vhost controller" + echo " -h |--help prints this message" + echo "" + echo "Environment:" + echo " VM_IMAGE path to QCOW2 VM image used during test (default: $DEFAULT_VM_IMAGE)" + echo "" + echo "Tests are performed only on Linux machine. For other OS no action is performed." + echo "" + exit 0; + ;; +esac + +echo "Running SPDK vhost fio autotest..." +if [[ $(uname -s) != Linux ]]; then + echo "" + echo "INFO: Vhost tests are only for Linux machine." + echo "" + exit 0 +fi + +: ${VM_IMAGE="$DEFAULT_VM_IMAGE"} +: ${FIO_BIN="$DEFAULT_FIO_BIN"} + +if [[ ! -r "${VM_IMAGE}" ]]; then + echo "" + echo "ERROR: VM image '${VM_IMAGE}' does not exist." + echo "" + exit 1 +fi + +DISKS_NUMBER=`lspci -mm -n | grep 0108 | tr -d '"' | awk -F " " '{print "0000:"$1}'| wc -l` + +WORKDIR=$(readlink -f $(dirname $0)) + +case $1 in + -n|--negative) + echo 'Negative tests suite...' + run_test case $WORKDIR/other/negative.sh + report_test_completion "vhost_negative" + ;; + -p|--performance) + echo 'Running performance suite...' + run_test case $WORKDIR/fiotest/autotest.sh --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0 \ + --test-type=spdk_vhost_scsi \ + --fio-job=$WORKDIR/common/fio_jobs/default_performance.job + report_test_completion "vhost_perf" + ;; + -pb|--performance-blk) + echo 'Running blk performance suite...' + run_test case $WORKDIR/fiotest/autotest.sh --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0 \ + --test-type=spdk_vhost_blk \ + --fio-job=$WORKDIR/common/fio_jobs/default_performance.job + report_test_completion "vhost_perf_blk" + ;; + -m|--migration) + echo 'Running migration suite...' + run_test case $WORKDIR/migration/migration.sh -x \ + --fio-bin=$FIO_BIN --os=$VM_IMAGE --test-cases=1,2 + ;; + -i|--integrity) + echo 'Running SCSI integrity suite...' + run_test case $WORKDIR/fiotest/autotest.sh -x --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0:Nvme0n1p1:Nvme0n1p2:Nvme0n1p3 \ + --test-type=spdk_vhost_scsi \ + --fio-job=$WORKDIR/common/fio_jobs/default_integrity.job + report_test_completion "nightly_vhost_integrity" + ;; + -ib|--integrity-blk) + echo 'Running blk integrity suite...' + run_test case $WORKDIR/fiotest/autotest.sh -x --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0:Nvme0n1p1:Nvme0n1p2:Nvme0n1p3 \ + --test-type=spdk_vhost_blk \ + --fio-job=$WORKDIR/common/fio_jobs/default_integrity.job + report_test_completion "nightly_vhost_integrity_blk" + ;; + -fs|--fs-integrity-scsi) + echo 'Running filesystem integrity suite with SCSI...' + run_test case $WORKDIR/integrity/integrity_start.sh --ctrl-type=spdk_vhost_scsi --fs="xfs ntfs btrfs ext4" + report_test_completion "vhost_fs_integrity_scsi" + ;; + -fb|--fs-integrity-blk) + echo 'Running filesystem integrity suite with BLK...' + run_test case $WORKDIR/integrity/integrity_start.sh --ctrl-type=spdk_vhost_blk --fs="xfs ntfs btrfs ext4" + report_test_completion "vhost_fs_integrity_blk" + ;; + -ils|--integrity-lvol-scsi) + echo 'Running lvol integrity suite...' + run_test case $WORKDIR/lvol/lvol_test.sh -x --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_scsi --thin-provisioning + report_test_completion "vhost_integrity_lvol_scsi" + ;; + -ilb|--integrity-lvol-blk) + echo 'Running lvol integrity suite...' + run_test case $WORKDIR/lvol/lvol_test.sh -x --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_blk + report_test_completion "vhost_integrity_lvol_blk" + ;; + -ilsn|--integrity-lvol-scsi-nightly) + if [[ $DISKS_NUMBER -ge 2 ]]; then + echo 'Running lvol integrity nightly suite with two cores and two controllers' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_scsi --max-disks=2 --distribute-cores --vm-count=2 + + echo 'Running lvol integrity nightly suite with one core and two controllers' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_scsi --max-disks=2 --vm-count=2 + fi + if [[ -e $CENTOS_VM_IMAGE ]]; then + echo 'Running lvol integrity nightly suite with different os types' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$CENTOS_FIO_BIN \ + --ctrl-type=spdk_vhost_scsi --vm-count=2 --multi-os + fi + echo 'Running lvol integrity nightly suite with one core and one controller' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_scsi --max-disks=1 + ;; + -ilbn|--integrity-lvol-blk-nightly) + if [[ $DISKS_NUMBER -ge 2 ]]; then + echo 'Running lvol integrity nightly suite with two cores and two controllers' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_blk --max-disks=2 --distribute-cores --vm-count=2 + + echo 'Running lvol integrity nightly suite with one core and two controllers' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_blk --max-disks=2 --vm-count=2 + fi + if [[ -e $CENTOS_VM_IMAGE ]]; then + echo 'Running lvol integrity nightly suite with different os types' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$CENTOS_FIO_BIN \ + --ctrl-type=spdk_vhost_blk --vm-count=2 --multi-os + fi + echo 'Running lvol integrity nightly suite with one core and one controller' + run_test case $WORKDIR/lvol/lvol_test.sh --fio-bin=$FIO_BIN \ + --ctrl-type=spdk_vhost_blk --max-disks=1 + ;; + -hp|--hotplug) + echo 'Running hotplug tests suite...' + run_test case $WORKDIR/hotplug/scsi_hotplug.sh --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0:Nvme0n1p1 \ + --vm=1,$VM_IMAGE,Nvme0n1p2:Nvme0n1p3 \ + --vm=2,$VM_IMAGE,Nvme0n1p4:Nvme0n1p5 \ + --vm=3,$VM_IMAGE,Nvme0n1p6:Nvme0n1p7 \ + --test-type=spdk_vhost_scsi \ + --fio-jobs=$WORKDIR/hotplug/fio_jobs/default_integrity.job -x + report_test_completion "vhost_hotplug" + ;; + -shr|--scsi-hot-remove) + echo 'Running scsi hotremove tests suite...' + run_test case $WORKDIR/hotplug/scsi_hotplug.sh --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0:Nvme0n1p1 \ + --vm=1,$VM_IMAGE,Nvme0n1p2:Nvme0n1p3 \ + --test-type=spdk_vhost_scsi \ + --scsi-hotremove-test \ + --fio-jobs=$WORKDIR/hotplug/fio_jobs/default_integrity.job + ;; + -bhr|--blk-hot-remove) + echo 'Running blk hotremove tests suite...' + run_test case $WORKDIR/hotplug/scsi_hotplug.sh --fio-bin=$FIO_BIN \ + --vm=0,$VM_IMAGE,Nvme0n1p0:Nvme0n1p1 \ + --vm=1,$VM_IMAGE,Nvme0n1p2:Nvme0n1p3 \ + --test-type=spdk_vhost_blk \ + --blk-hotremove-test \ + --fio-jobs=$WORKDIR/hotplug/fio_jobs/default_integrity.job + ;; + -ro|--readonly) + echo 'Running readonly tests suite...' + run_test case $WORKDIR/readonly/readonly.sh --vm_image=$VM_IMAGE --disk=Nvme0n1 -x + report_test_completion "vhost_readonly" + ;; + -b|--boot) + echo 'Running os boot from vhost controller...' + $WORKDIR/vhost_boot/vhost_boot.sh --vm_image=$VM_IMAGE + report_test_completion "vhost_boot" + ;; + *) + echo "unknown test type: $1" + exit 1 + ;; +esac diff --git a/src/spdk/test/vhost/test_plan.md b/src/spdk/test/vhost/test_plan.md new file mode 100644 index 00000000..b412436a --- /dev/null +++ b/src/spdk/test/vhost/test_plan.md @@ -0,0 +1,252 @@ +# SPDK vhost Test Plan + +## Current Tests + +### Integrity tests + +#### vhost self test +- compiles SPDK and Qemu +- launches SPDK Vhost +- starts VM with 1 NVMe device attached to it +- issues controller "reset" command using sg3_utils on guest system +- performs data integrity check using dd to write and read data from the device +- runs on 3 host systems (Ubuntu 16.04, Centos 7.3 and Fedora 25) + and 1 guest system (Ubuntu 16.04) +- runs against vhost scsi and vhost blk + +#### FIO Integrity tests +- NVMe device is split into 4 LUNs, each is attached to separate vhost controller +- FIO uses job configuration with randwrite mode to verify if random pattern was + written to and read from correctly on each LUN +- runs on Fedora 25 and Ubuntu 16.04 guest systems +- runs against vhost scsi and vhost blk + +#### Lvol tests +- starts vhost with at least 1 NVMe device +- starts 1 VM or multiple VMs +- lvol store is constructed on each NVMe device +- on each lvol store 1 lvol bdev will be constructed for each running VM +- Logical volume block device is used as backend instead of using + NVMe device backend directly +- after set up, data integrity check will be performed by FIO randwrite + operation with verify flag enabled +- optionally nested lvols can be tested with use of appropriate flag; + On each base lvol store additional lvol bdev will be created which will + serve as a base for nested lvol stores. + On each of the nested lvol stores there will be 1 lvol bdev created for each + VM running. Nested lvol bdevs will be used along with base lvol bdevs for + data integrity check. +- runs against vhost scsi and vhost blk + +#### Filesystem integrity +- runs SPDK with 1 VM with 1 NVMe device attached. +- creates a partition table and filesystem on passed device, and mounts it +- 1GB test file is created on mounted file system and FIO randrw traffic + (with enabled verification) is run +- Tested file systems: ext4, brtfs, ntfs, xfs +- runs against vhost scsi and vhost blk + +#### Windows HCK SCSI Compliance Test 2.0. +- Runs SPDK with 1 VM with Windows Server 2012 R2 operating system +- 4 devices are passed into the VM: NVMe, Split NVMe, Malloc and Split Malloc +- On each device Windows HCK SCSI Compliance Test 2.0 is run + +#### MultiOS test +- start 3 VMs with guest systems: Ubuntu 16.04, Fedora 25 and Windows Server 2012 R2 +- 3 physical NVMe devices are split into 9 LUNs +- each guest uses 3 LUNs from 3 different physical NVMe devices +- Linux guests run FIO integrity jobs to verify read/write operations, + while Windows HCK SCSI Compliance Test 2.0 is running on Windows guest + +#### vhost hot-remove tests +- removing NVMe device (unbind from driver) which is already claimed + by controller in vhost +- hotremove tests performed with and without I/O traffic to device +- I/O traffic, if present in test, has verification enabled +- checks that vhost and/or VMs do not crash +- checks that other devices are unaffected by hot-remove of a NVMe device +- performed against vhost blk and vhost scsi + +#### vhost scsi hot-attach and hot-detach tests +- adding and removing devices via RPC to a controller which is already in use by a VM +- I/O traffic generated with FIO read/write operations, verification enabled +- checks that vhost and/or VMs do not crash +- checks that other devices in the same controller are unaffected by hot-attach + and hot-detach operations + +#### virtio initiator tests +- virtio user mode: connect to vhost-scsi controller sockets directly on host +- virtio pci mode: connect to virtual pci devices on guest virtual machine +- 6 concurrent jobs are run simultaneously on 7 devices, each with 8 virtqueues + +##### kernel virtio-scsi-pci device +- test support for kernel vhost-scsi device +- create 1GB ramdisk using targetcli +- create target and add ramdisk to it using targetcli +- add created device to virtio pci tests + +##### emulated virtio-scsi-pci device +- test support for QEMU emulated virtio-scsi-pci device +- add emulated virtio device "Virtio0" to virtio pci tests + +##### Test configuration +- SPDK vhost application is used for testing +- FIO using spdk fio_plugin: rw, randrw, randwrite, write with verification enabled. +- trim sequential and trim random then write on trimmed areas with verification enabled + only on unmap supporting devices +- FIO job configuration: iodepth=128, block size=4k, runtime=10s +- all test cases run jobs in parallel on multiple bdevs +- 8 queues per device + +##### vhost configuration +- scsi controller with 4 NVMe splits +- 2 block controllers, each with 1 NVMe split +- scsi controller with malloc with 512 block size +- scsi controller with malloc with 4096 block size + +##### Test case 1 +- virtio user on host +- perform FIO rw, randwrite, randrw, write, parallel jobs on all devices + +##### Test case 2 +- virtio user on host +- perform FIO trim, randtrim, rw, randwrite, randrw, write, - parallel jobs + then write on trimmed areas on unmap supporting devices + +##### Test case 3 +- virtio pci on vm +- same config as in TC#1 + +##### Test case 4 +- virtio pci on vm +- same config as in TC#2 + +### Live migration +Live migration feature allows to move running virtual machines between SPDK vhost +instances. +Following tests include scenarios with SPDK vhost instances running on both the same +physical server and between remote servers. +Additional configuration of utilities like SSHFS share, NIC IP address adjustment, +etc., might be necessary. + +#### Test case 1 - single vhost migration +- Start SPDK Vhost application. + - Construct a single Malloc bdev. + - Construct two SCSI controllers and add previously created Malloc bdev to it. +- Start first VM (VM_1) and connect to Vhost_1 controller. + Verify if attached disk is visible in the system. +- Start second VM (VM_2) but with "-incoming" option enabled, connect to. + Connect to Vhost_2 controller. Use the same VM image as VM_1. +- On VM_1 start FIO write job with verification enabled to connected Malloc bdev. +- Start VM migration from VM_1 to VM_2 while FIO is still running on VM_1. +- Once migration is complete check the result using Qemu monitor. Migration info + on VM_1 should return "Migration status: completed". +- VM_2 should be up and running after migration. Via SSH log in and check FIO + job result - exit code should be 0 and there should be no data verification errors. +- Cleanup: + - Shutdown both VMs. + - Gracefully shutdown Vhost instance. + +#### Test case 2 - single server migration +- Detect RDMA NICs; At least 1 RDMA NIC is needed to run the test. + If there is no physical NIC available then emulated Soft Roce NIC will + be used instead. +- Create /tmp/share directory and put a test VM image in there. +- Start SPDK NVMeOF Target application. + - Construct a single NVMe bdev from available bound NVMe drives. + - Create NVMeoF subsystem with NVMe bdev as single namespace. +- Start first SDPK Vhost application instance (later referred to as "Vhost_1"). + - Use different shared memory ID and CPU mask than NVMeOF Target. + - Construct a NVMe bdev by connecting to NVMeOF Target + (using trtype: rdma). + - Construct a single SCSI controller and add NVMe bdev to it. +- Start first VM (VM_1) and connect to Vhost_1 controller. Verify if attached disk + is visible in the system. +- Start second SDPK Vhost application instance (later referred to as "Vhost_2"). + - Use different shared memory ID and CPU mask than previous SPDK instances. + - Construct a NVMe bdev by connecting to NVMeOF Target. Connect to the same + subsystem as Vhost_1, multiconnection is allowed. + - Construct a single SCSI controller and add NVMe bdev to it. +- Start second VM (VM_2) but with "-incoming" option enabled. +- Check states of both VMs using Qemu monitor utility. + VM_1 should be in running state. + VM_2 should be in paused (inmigrate) state. +- Run FIO I/O traffic with verification enabled on to attached NVME on VM_1. +- While FIO is running issue a command for VM_1 to migrate. +- When the migrate call returns check the states of VMs again. + VM_1 should be in paused (postmigrate) state. "info migrate" should report + "Migration status: completed". + VM_2 should be in running state. +- Verify that FIO task completed successfully on VM_2 after migrating. + There should be no I/O failures, no verification failures, etc. +- Cleanup: + - Shutdown both VMs. + - Gracefully shutdown Vhost instances and NVMEoF Target instance. + - Remove /tmp/share directory and it's contents. + - Clean RDMA NIC / Soft RoCE configuration. + +#### Test case 3 - remote server migration +- Detect RDMA NICs on physical hosts. At least 1 RDMA NIC per host is needed + to run the test. +- On Host 1 create /tmp/share directory and put a test VM image in there. +- On Host 2 create /tmp/share directory. Using SSHFS mount /tmp/share from Host 1 + so that the same VM image can be used on both hosts. +- Start SPDK NVMeOF Target application on Host 1. + - Construct a single NVMe bdev from available bound NVMe drives. + - Create NVMeoF subsystem with NVMe bdev as single namespace. +- Start first SDPK Vhost application instance on Host 1(later referred to as "Vhost_1"). + - Use different shared memory ID and CPU mask than NVMeOF Target. + - Construct a NVMe bdev by connecting to NVMeOF Target + (using trtype: rdma). + - Construct a single SCSI controller and add NVMe bdev to it. +- Start first VM (VM_1) and connect to Vhost_1 controller. Verify if attached disk + is visible in the system. +- Start second SDPK Vhost application instance on Host 2(later referred to as "Vhost_2"). + - Construct a NVMe bdev by connecting to NVMeOF Target. Connect to the same + subsystem as Vhost_1, multiconnection is allowed. + - Construct a single SCSI controller and add NVMe bdev to it. +- Start second VM (VM_2) but with "-incoming" option enabled. +- Check states of both VMs using Qemu monitor utility. + VM_1 should be in running state. + VM_2 should be in paused (inmigrate) state. +- Run FIO I/O traffic with verification enabled on to attached NVME on VM_1. +- While FIO is running issue a command for VM_1 to migrate. +- When the migrate call returns check the states of VMs again. + VM_1 should be in paused (postmigrate) state. "info migrate" should report + "Migration status: completed". + VM_2 should be in running state. +- Verify that FIO task completed successfully on VM_2 after migrating. + There should be no I/O failures, no verification failures, etc. +- Cleanup: + - Shutdown both VMs. + - Gracefully shutdown Vhost instances and NVMEoF Target instance. + - Remove /tmp/share directory and it's contents. + - Clean RDMA NIC configuration. + +### Performance tests +Tests verifying the performance and efficiency of the module. + +#### FIO Performance 6 NVMes +- SPDK and created controllers run on 2 CPU cores. +- Each NVMe drive is split into 2 Split NVMe bdevs, which gives a total of 12 + in test setup. +- 12 vhost controllers are created, one for each Split NVMe bdev. All controllers + use the same CPU mask as used for running Vhost instance. +- 12 virtual machines are run as guest systems (with Ubuntu 16.04.2); Each VM + connects to a single corresponding vhost controller. + Per VM configuration is: 2 pass-through host CPU's, 1 GB RAM, 2 IO controller queues. +- NVMe drives are pre-conditioned before the test starts. Pre-conditioning is done by + writing over whole disk sequentially at least 2 times. +- FIO configurations used for tests: + - IO depths: 1, 8, 128 + - Blocksize: 4k + - RW modes: read, randread, write, randwrite, rw, randrw + - Write modes are additionally run with 15 minute ramp-up time to allow better + measurements. Randwrite mode uses longer ramp-up preconditioning of 90 minutes per run. +- Each FIO job result is compared with baseline results to allow detecting performance drops. + +## Future tests and improvements + +### Stress tests +- Add stability and stress tests (long duration tests, long looped start/stop tests, etc.) +to test pool diff --git a/src/spdk/test/vhost/vhost_boot/vhost_boot.sh b/src/spdk/test/vhost/vhost_boot/vhost_boot.sh new file mode 100755 index 00000000..42bd7f22 --- /dev/null +++ b/src/spdk/test/vhost/vhost_boot/vhost_boot.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +set -xe + +basedir=$(readlink -f $(dirname $0)) +. $basedir/../common/common.sh +rpc_py="python $SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock" +vm_no="0" + +function err_clean +{ + trap - ERR + print_backtrace + set +e + error "Error on $1 $2" + vm_kill_all + $rpc_py remove_vhost_scsi_target naa.vhost_vm.$vm_no 0 + $rpc_py remove_vhost_controller naa.vhost_vm.$vm_no + $rpc_py destroy_lvol_bdev $lvb_u + $rpc_py destroy_lvol_store -u $lvs_u + spdk_vhost_kill + exit 1 +} + +function usage() +{ + [[ ! -z $2 ]] && ( echo "$2"; echo ""; ) + echo "Usage: $(basename $1) vm_image=PATH [-h|--help]" + echo "-h, --help Print help and exit" + echo " --vm_image=PATH Path to VM image used in these tests" +} + +while getopts 'h-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + vm_image=*) os_image="${OPTARG#*=}" ;; + *) usage $0 echo "Invalid argument '$OPTARG'" && exit 1 ;; + esac + ;; + h) usage $0 && exit 0 ;; + *) usage $0 "Invalid argument '$optchar'" && exit 1 ;; + esac +done + +if [[ $EUID -ne 0 ]]; then + echo "INFO: Go away user come back as root" + exit 1 +fi + +if [[ -z $os_image ]]; then + echo "No path to os image is given" + exit 1 +fi + +timing_enter vhost_boot +trap 'err_clean "${FUNCNAME}" "${LINENO}"' ERR +timing_enter start_vhost +spdk_vhost_run +timing_exit start_vhost + +timing_enter create_lvol +lvs_u=$($rpc_py construct_lvol_store Nvme0n1 lvs0) +lvb_u=$($rpc_py construct_lvol_bdev -u $lvs_u lvb0 20000) +timing_exit create_lvol + +timing_enter convert_vm_image +modprobe nbd +trap '$rpc_py stop_nbd_disk /dev/nbd0; rmmod nbd; err_clean "${FUNCNAME}" "${LINENO}"' ERR +$rpc_py start_nbd_disk $lvb_u /dev/nbd0 +$QEMU_PREFIX/bin/qemu-img convert $os_image -O raw /dev/nbd0 +sync +$rpc_py stop_nbd_disk /dev/nbd0 +sleep 1 +rmmod nbd +timing_exit convert_vm_image + +trap 'err_clean "${FUNCNAME}" "${LINENO}"' ERR +timing_enter create_vhost_controller +$rpc_py construct_vhost_scsi_controller naa.vhost_vm.$vm_no +$rpc_py add_vhost_scsi_lun naa.vhost_vm.$vm_no 0 $lvb_u +timing_exit create_vhost_controller + +timing_enter setup_vm +vm_setup --disk-type=spdk_vhost_scsi --force=$vm_no --disks="vhost_vm" --spdk-boot="vhost_vm" +vm_run $vm_no +vm_wait_for_boot 600 $vm_no +timing_exit setup_vm + +timing_enter run_vm_cmd +vm_ssh $vm_no "parted -s /dev/sda mkpart primary 10GB 100%; partprobe; sleep 0.1;" +vm_ssh $vm_no "mkfs.ext4 -F /dev/sda2; mkdir -p /mnt/sda2test; mount /dev/sda2 /mnt/sda2test;" +vm_ssh $vm_no "fio --name=integrity --bsrange=4k-512k --iodepth=128 --numjobs=1 --direct=1 \ + --thread=1 --group_reporting=1 --rw=randrw --rwmixread=70 --filename=/mnt/sda2test/test_file \ + --verify=md5 --do_verify=1 --verify_backlog=1024 --fsync_on_close=1 --runtime=20 \ + --time_based=1 --size=1024m" +vm_ssh $vm_no "umount /mnt/sda2test; rm -rf /mnt/sda2test" +alignment_offset=$(vm_ssh $vm_no "cat /sys/block/sda/sda1/alignment_offset") +echo "alignment_offset: $alignment_offset" +timing_exit run_vm_cmd + +vm_shutdown_all + +timing_enter clean_vhost +$rpc_py remove_vhost_scsi_target naa.vhost_vm.$vm_no 0 +$rpc_py remove_vhost_controller naa.vhost_vm.$vm_no +$rpc_py destroy_lvol_bdev $lvb_u +$rpc_py destroy_lvol_store -u $lvs_u +spdk_vhost_kill +timing_exit clean_vhost + +timing_exit vhost_boot |