From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/spdk/test/vhost/migration/autotest.config | 14 ++ src/spdk/test/vhost/migration/migration-tc1.job | 25 +++ src/spdk/test/vhost/migration/migration-tc1.sh | 119 +++++++++++++ src/spdk/test/vhost/migration/migration-tc2.job | 20 +++ src/spdk/test/vhost/migration/migration-tc2.sh | 203 ++++++++++++++++++++++ src/spdk/test/vhost/migration/migration-tc3.job | 20 +++ src/spdk/test/vhost/migration/migration-tc3a.sh | 218 ++++++++++++++++++++++++ src/spdk/test/vhost/migration/migration-tc3b.sh | 77 +++++++++ src/spdk/test/vhost/migration/migration.sh | 143 ++++++++++++++++ 9 files changed, 839 insertions(+) create mode 100644 src/spdk/test/vhost/migration/autotest.config create mode 100644 src/spdk/test/vhost/migration/migration-tc1.job create mode 100644 src/spdk/test/vhost/migration/migration-tc1.sh create mode 100644 src/spdk/test/vhost/migration/migration-tc2.job create mode 100644 src/spdk/test/vhost/migration/migration-tc2.sh create mode 100644 src/spdk/test/vhost/migration/migration-tc3.job create mode 100644 src/spdk/test/vhost/migration/migration-tc3a.sh create mode 100644 src/spdk/test/vhost/migration/migration-tc3b.sh create mode 100755 src/spdk/test/vhost/migration/migration.sh (limited to 'src/spdk/test/vhost/migration') diff --git a/src/spdk/test/vhost/migration/autotest.config b/src/spdk/test/vhost/migration/autotest.config new file mode 100644 index 000000000..ccda306ea --- /dev/null +++ b/src/spdk/test/vhost/migration/autotest.config @@ -0,0 +1,14 @@ +vhost_0_reactor_mask=["0"] +vhost_0_master_core=0 + +vhost_1_reactor_mask=["0"] +vhost_1_master_core=0 + +VM_0_qemu_mask=1 +VM_0_qemu_numa_node=0 + +VM_1_qemu_mask=1 +VM_1_qemu_numa_node=0 + +VM_2_qemu_mask=1 +VM_2_qemu_numa_node=0 diff --git a/src/spdk/test/vhost/migration/migration-tc1.job b/src/spdk/test/vhost/migration/migration-tc1.job new file mode 100644 index 000000000..5383b243f --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc1.job @@ -0,0 +1,25 @@ +[global] +blocksize_range=4k-512k +#bs=512k +iodepth=128 +ioengine=libaio +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_fatal=1 +verify_dump=1 +size=100% + +[write] +rw=write +stonewall + +[randread] +rw=randread +runtime=10 +time_based +stonewall diff --git a/src/spdk/test/vhost/migration/migration-tc1.sh b/src/spdk/test/vhost/migration/migration-tc1.sh new file mode 100644 index 000000000..6d5a436ef --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc1.sh @@ -0,0 +1,119 @@ +function migration_tc1_clean_vhost_config() { + # Restore trap + trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + notice "Removing vhost devices & controllers via RPC ..." + # Delete bdev first to remove all LUNs and SCSI targets + $rpc bdev_malloc_delete Malloc0 + + # Delete controllers + $rpc vhost_delete_controller $incoming_vm_ctrlr + $rpc vhost_delete_controller $target_vm_ctrlr + + unset -v incoming_vm target_vm incoming_vm_ctrlr target_vm_ctrlr rpc +} + +function migration_tc1_configure_vhost() { + # Those are global intentionally - they will be unset in cleanup handler + incoming_vm=0 + target_vm=1 + incoming_vm_ctrlr=naa.Malloc0.$incoming_vm + target_vm_ctrlr=naa.Malloc0.$target_vm + rpc="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock" + + trap 'migration_tc1_error_handler; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + # Construct shared Malloc Bdev + $rpc bdev_malloc_create -b Malloc0 128 4096 + + # And two controllers - one for each VM. Both are using the same Malloc Bdev as LUN 0 + $rpc vhost_create_scsi_controller $incoming_vm_ctrlr + $rpc vhost_scsi_controller_add_target $incoming_vm_ctrlr 0 Malloc0 + + $rpc vhost_create_scsi_controller $target_vm_ctrlr + $rpc vhost_scsi_controller_add_target $target_vm_ctrlr 0 Malloc0 +} + +function migration_tc1_error_handler() { + trap - SIGINT ERR EXIT + warning "Migration TC1 ERROR HANDLER" + print_backtrace + set -x + + vm_kill_all + migration_tc1_clean_vhost_config + + warning "Migration TC1 FAILED" +} + +function migration_tc1() { + # Use 2 VMs: + # incoming VM - the one we want to migrate + # targe VM - the one which will accept migration + local job_file="$testdir/migration-tc1.job" + local log_file + log_file="/root/$(basename ${job_file%%.*}).log" + + # Run vhost + vhost_run 0 + migration_tc1_configure_vhost + + notice "Setting up VMs" + vm_setup --os="$os_image" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=Malloc0 --migrate-to=$target_vm + vm_setup --force=$target_vm --disk-type=spdk_vhost_scsi --disks=Malloc0 --incoming=$incoming_vm + + # Run everything + vm_run $incoming_vm $target_vm + + # Wait only for incoming VM, as target is waiting for migration + vm_wait_for_boot 300 $incoming_vm + + # Run fio before migration + notice "Starting FIO" + + vm_check_scsi_location $incoming_vm + run_fio $fio_bin --job-file="$job_file" --no-wait-for-fio --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)" + + # Wait a while to let the FIO time to issue some IO + sleep 5 + + # Check if fio is still running before migration + if ! is_fio_running $incoming_vm; then + vm_exec $incoming_vm "cat $log_file" + error "FIO is not running before migration: process crashed or finished too early" + fi + + vm_migrate $incoming_vm + sleep 3 + + # Check if fio is still running after migration + if ! is_fio_running $target_vm; then + vm_exec $target_vm "cat $log_file" + error "FIO is not running after migration: process crashed or finished too early" + fi + + notice "Waiting for fio to finish" + local timeout=40 + while is_fio_running $target_vm; do + sleep 1 + echo -n "." + if ((timeout-- == 0)); then + error "timeout while waiting for FIO!" + fi + done + + notice "Fio result is:" + vm_exec $target_vm "cat $log_file" + + notice "Migration DONE" + + notice "Shutting down all VMs" + vm_shutdown_all + + migration_tc1_clean_vhost_config + + notice "killing vhost app" + vhost_kill 0 + + notice "Migration TC1 SUCCESS" +} diff --git a/src/spdk/test/vhost/migration/migration-tc2.job b/src/spdk/test/vhost/migration/migration-tc2.job new file mode 100644 index 000000000..df78a3cd6 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc2.job @@ -0,0 +1,20 @@ +[global] +blocksize_range=4k-512k +iodepth=128 +ioengine=libaio +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_fatal=1 +verify_dump=1 +verify_backlog=8 + +[randwrite] +rw=randwrite +runtime=15 +time_based +stonewall diff --git a/src/spdk/test/vhost/migration/migration-tc2.sh b/src/spdk/test/vhost/migration/migration-tc2.sh new file mode 100644 index 000000000..aa234d842 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc2.sh @@ -0,0 +1,203 @@ +source $rootdir/test/nvmf/common.sh + +function migration_tc2_cleanup_nvmf_tgt() { + local i + + if [[ ! -r "$nvmf_dir/nvmf_tgt.pid" ]]; then + warning "Pid file '$nvmf_dir/nvmf_tgt.pid' does not exist. " + return + fi + + if [[ -n "$1" ]]; then + trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + pkill --signal $1 -F $nvmf_dir/nvmf_tgt.pid || true + sleep 5 + if ! pkill -F $nvmf_dir/nvmf_tgt.pid; then + fail "failed to kill nvmf_tgt app" + fi + else + pkill --signal SIGTERM -F $nvmf_dir/nvmf_tgt.pid || true + for ((i = 0; i < 20; i++)); do + if ! pkill --signal 0 -F $nvmf_dir/nvmf_tgt.pid; then + break + fi + sleep 0.5 + done + + if pkill --signal 0 -F $nvmf_dir/nvmf_tgt.pid; then + error "nvmf_tgt failed to shutdown" + fi + fi + + rm $nvmf_dir/nvmf_tgt.pid + unset -v nvmf_dir rpc_nvmf +} + +function migration_tc2_cleanup_vhost_config() { + timing_enter migration_tc2_cleanup_vhost_config + + trap 'migration_tc2_cleanup_nvmf_tgt SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + notice "Shutting down all VMs" + vm_shutdown_all + + notice "Removing vhost devices & controllers via RPC ..." + # Delete bdev first to remove all LUNs and SCSI targets + $rpc_0 bdev_nvme_detach_controller Nvme0 + $rpc_0 vhost_delete_controller $incoming_vm_ctrlr + + $rpc_1 delete_nvme_controller Nvme0 + $rpc_1 vhost_delete_controller $target_vm_ctrlr + + notice "killing vhost app" + vhost_kill 0 + vhost_kill 1 + + unset -v incoming_vm target_vm incoming_vm_ctrlr target_vm_ctrlr + unset -v rpc_0 rpc_1 + + trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + migration_tc2_cleanup_nvmf_tgt + + timing_exit migration_tc2_cleanup_vhost_config +} + +function migration_tc2_configure_vhost() { + timing_enter migration_tc2_configure_vhost + + # Those are global intentionally - they will be unset in cleanup handler + nvmf_dir="$VHOST_DIR/nvmf_tgt" + + incoming_vm=1 + target_vm=2 + incoming_vm_ctrlr=naa.VhostScsi0.$incoming_vm + target_vm_ctrlr=naa.VhostScsi0.$target_vm + + rpc_nvmf="$rootdir/scripts/rpc.py -s $nvmf_dir/rpc.sock" + rpc_0="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock" + rpc_1="$rootdir/scripts/rpc.py -s $(get_vhost_dir 1)/rpc.sock" + + # Default cleanup/error handlers will not shutdown nvmf_tgt app so setup it + # here to teardown in cleanup function + trap 'migration_tc2_error_cleanup; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + + # Run nvmf_tgt and two vhost instances: + # nvmf_tgt uses core id 2 (-m 0x4) + # First uses core id 0 + # Second uses core id 1 + # This force to use VM 1 and 2. + timing_enter start_nvmf_tgt + notice "Running nvmf_tgt..." + mkdir -p $nvmf_dir + rm -f $nvmf_dir/* + $SPDK_BIN_DIR/nvmf_tgt -s 512 -m 0x4 -r $nvmf_dir/rpc.sock --wait-for-rpc & + local nvmf_tgt_pid=$! + echo $nvmf_tgt_pid > $nvmf_dir/nvmf_tgt.pid + waitforlisten "$nvmf_tgt_pid" "$nvmf_dir/rpc.sock" + $rpc_nvmf framework_start_init + $rpc_nvmf nvmf_create_transport -t RDMA -u 8192 + $rootdir/scripts/gen_nvme.sh --json | $rpc_nvmf load_subsystem_config + timing_exit start_nvmf_tgt + + vhost_run 0 "-m 0x1 -s 512 -u" + vhost_run 1 "-m 0x2 -s 512 -u" + + local rdma_ip_list + local nvmf_target_ip + rdma_ip_list=$(get_available_rdma_ips) + nvmf_target_ip=$(echo "$rdma_ip_list" | head -n 1) + + if [[ -z "$nvmf_target_ip" ]]; then + fail "no NIC for nvmf target" + fi + + notice "Configuring nvmf_tgt, vhost devices & controllers via RPC ..." + + # Construct shared bdevs and controllers + $rpc_nvmf nvmf_create_subsystem nqn.2016-06.io.spdk:cnode1 -a -s SPDK00000000000001 + $rpc_nvmf nvmf_subsystem_add_ns nqn.2016-06.io.spdk:cnode1 Nvme0n1 + $rpc_nvmf nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode1 -t rdma -a $nvmf_target_ip -s 4420 + + $rpc_0 bdev_nvme_attach_controller -b Nvme0 -t rdma -f ipv4 -a $nvmf_target_ip -s 4420 -n "nqn.2016-06.io.spdk:cnode1" + $rpc_0 vhost_create_scsi_controller $incoming_vm_ctrlr + $rpc_0 vhost_scsi_controller_add_target $incoming_vm_ctrlr 0 Nvme0n1 + + $rpc_1 bdev_nvme_attach_controller -b Nvme0 -t rdma -f ipv4 -a $nvmf_target_ip -s 4420 -n "nqn.2016-06.io.spdk:cnode1" + $rpc_1 vhost_create_scsi_controller $target_vm_ctrlr + $rpc_1 vhost_scsi_controller_add_target $target_vm_ctrlr 0 Nvme0n1 + + notice "Setting up VMs" + vm_setup --os="$os_image" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \ + --migrate-to=$target_vm --memory=1024 --vhost-name=0 + vm_setup --force=$target_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 --incoming=$incoming_vm --memory=1024 \ + --vhost-name=1 + + # Run everything + vm_run $incoming_vm $target_vm + + # Wait only for incoming VM, as target is waiting for migration + vm_wait_for_boot 300 $incoming_vm + + notice "Configuration done" + + timing_exit migration_tc2_configure_vhost +} + +function migration_tc2_error_cleanup() { + trap - SIGINT ERR EXIT + set -x + + vm_kill_all + migration_tc2_cleanup_vhost_config + notice "Migration TC2 FAILED" +} + +function migration_tc2() { + # Use 2 VMs: + # incoming VM - the one we want to migrate + # targe VM - the one which will accept migration + local job_file="$testdir/migration-tc2.job" + local log_file + log_file="/root/$(basename ${job_file%%.*}).log" + + migration_tc2_configure_vhost + + # Run fio before migration + notice "Starting FIO" + vm_check_scsi_location $incoming_vm + run_fio $fio_bin --job-file="$job_file" --no-wait-for-fio --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)" + + # Wait a while to let the FIO time to issue some IO + sleep 5 + + # Check if fio is still running before migration + if ! is_fio_running $incoming_vm; then + vm_exec $incoming_vm "cat $log_file" + error "FIO is not running before migration: process crashed or finished too early" + fi + + vm_migrate $incoming_vm + sleep 3 + + # Check if fio is still running after migration + if ! is_fio_running $target_vm; then + vm_exec $target_vm "cat $log_file" + error "FIO is not running after migration: process crashed or finished too early" + fi + + notice "Waiting for fio to finish" + local timeout=40 + while is_fio_running $target_vm; do + sleep 1 + echo -n "." + if ((timeout-- == 0)); then + error "timeout while waiting for FIO!" + fi + done + + notice "Fio result is:" + vm_exec $target_vm "cat $log_file" + + migration_tc2_cleanup_vhost_config + notice "Migration TC2 SUCCESS" +} diff --git a/src/spdk/test/vhost/migration/migration-tc3.job b/src/spdk/test/vhost/migration/migration-tc3.job new file mode 100644 index 000000000..fe1929662 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc3.job @@ -0,0 +1,20 @@ +[global] +blocksize=4k-512k +iodepth=128 +ioengine=libaio +filename= +group_reporting +thread +numjobs=1 +direct=1 +do_verify=1 +verify=md5 +verify_fatal=1 +verify_dump=1 +verify_backlog=8 + +[randwrite] +rw=randwrite +runtime=15 +time_based +stonewall diff --git a/src/spdk/test/vhost/migration/migration-tc3a.sh b/src/spdk/test/vhost/migration/migration-tc3a.sh new file mode 100644 index 000000000..b8f06a8d0 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc3a.sh @@ -0,0 +1,218 @@ +source $rootdir/test/nvmf/common.sh +source $testdir/autotest.config + +incoming_vm=1 +target_vm=2 +incoming_vm_ctrlr=naa.VhostScsi0.$incoming_vm +target_vm_ctrlr=naa.VhostScsi0.$target_vm +share_dir=$TEST_DIR/share +spdk_repo_share_dir=$TEST_DIR/share_spdk +job_file=$testdir/migration-tc3.job +log_file="/root/$(basename ${job_file%%.*}).log" + +if [ -z "$MGMT_TARGET_IP" ]; then + error "No IP address of target is given" +fi + +if [ -z "$MGMT_INITIATOR_IP" ]; then + error "No IP address of initiator is given" +fi + +if [ -z "$RDMA_TARGET_IP" ]; then + error "No IP address of targets RDMA capable NIC is given" +fi + +if [ -z "$RDMA_INITIATOR_IP" ]; then + error "No IP address of initiators RDMA capable NIC is given" +fi + +function ssh_remote() { + local ssh_cmd="sshpass -p root ssh \ + -o UserKnownHostsFile=/dev/null \ + -o StrictHostKeyChecking=no \ + -o ControlMaster=auto \ + -o User=root \ + $1" + + shift + $ssh_cmd "$@" +} + +function wait_for_remote() { + local timeout=40 + set +x + while [[ ! -f $share_dir/DONE ]]; do + echo -n "." + if ((timeout-- == 0)); then + error "timeout while waiting for FIO!" + fi + sleep 1 + done + set -x + rm -f $share_dir/DONE +} + +function check_rdma_connection() { + local nic_name + nic_name=$(ip -4 -o addr show to $RDMA_TARGET_IP up | cut -d' ' -f2) + if [[ -z $nic_name ]]; then + error "There is no NIC with IP address $RDMA_TARGET_IP configured" + fi + + if ! ls /sys/class/infiniband/*/device/net/$nic_name &> /dev/null; then + error "$nic_name with IP $RDMA_TARGET_IP is not a RDMA capable NIC" + fi + +} + +function host1_cleanup_nvmf() { + notice "Shutting down nvmf_tgt on local server" + if [[ -n "$1" ]]; then + pkill --signal $1 -F $nvmf_dir/nvmf_tgt.pid + else + pkill -F $nvmf_dir/nvmf_tgt.pid + fi + rm -f $nvmf_dir/nvmf_tgt.pid +} + +function host1_cleanup_vhost() { + trap 'host1_cleanup_nvmf SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + notice "Shutting down VM $incoming_vm" + vm_kill $incoming_vm + + notice "Removing bdev & controller from vhost on local server" + $rpc_0 bdev_nvme_detach_controller Nvme0 + $rpc_0 vhost_delete_controller $incoming_vm_ctrlr + + notice "Shutting down vhost app" + vhost_kill 0 + + host1_cleanup_nvmf +} + +function host1_start_nvmf() { + nvmf_dir="$TEST_DIR/nvmf_tgt" + rpc_nvmf="$rootdir/scripts/rpc.py -s $nvmf_dir/nvmf_rpc.sock" + + notice "Starting nvmf_tgt instance on local server" + mkdir -p $nvmf_dir + rm -rf "${nvmf_dir:?}/"* + + trap 'host1_cleanup_nvmf SIGKILL; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + $SPDK_BIN_DIR/nvmf_tgt -s 512 -m 0xF -r $nvmf_dir/nvmf_rpc.sock --wait-for-rpc & + nvmf_tgt_pid=$! + echo $nvmf_tgt_pid > $nvmf_dir/nvmf_tgt.pid + waitforlisten "$nvmf_tgt_pid" "$nvmf_dir/nvmf_rpc.sock" + $rpc_nvmf framework_start_init + $rpc_nvmf nvmf_create_transport -t RDMA -u 8192 + $rootdir/scripts/gen_nvme.sh --json | $rpc_nvmf load_subsystem_config + + $rpc_nvmf nvmf_create_subsystem nqn.2018-02.io.spdk:cnode1 -a -s SPDK01 + $rpc_nvmf nvmf_subsystem_add_ns nqn.2018-02.io.spdk:cnode1 Nvme0n1 + $rpc_nvmf nvmf_subsystem_add_listener nqn.2018-02.io.spdk:cnode1 -t rdma -a $RDMA_TARGET_IP -s 4420 +} + +function host1_start_vhost() { + rpc_0="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock" + + notice "Starting vhost0 instance on local server" + trap 'host1_cleanup_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + vhost_run 0 "-u" + $rpc_0 bdev_nvme_attach_controller -b Nvme0 -t rdma -f ipv4 -a $RDMA_TARGET_IP -s 4420 -n "nqn.2018-02.io.spdk:cnode1" + $rpc_0 vhost_create_scsi_controller $incoming_vm_ctrlr + $rpc_0 vhost_scsi_controller_add_target $incoming_vm_ctrlr 0 Nvme0n1 + + vm_setup --os="$share_dir/migration.qcow2" --force=$incoming_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \ + --migrate-to=$target_vm --memory=512 --queue_num=1 + + # TODO: Fix loop calculating cpu_num in common.sh + # We need -smp 1 and -queue_num 1 for this test to work, and this loop + # in some cases calculates wrong cpu_num. + sed -i "s#smp 2#smp 1#g" $VM_BASE_DIR/$incoming_vm/run.sh + vm_run $incoming_vm + vm_wait_for_boot 300 $incoming_vm +} + +function cleanup_share() { + set +e + notice "Cleaning up share directory on remote and local server" + ssh_remote $MGMT_INITIATOR_IP "umount $VM_BASE_DIR" + ssh_remote $MGMT_INITIATOR_IP "umount $share_dir; rm -f $share_dir/* rm -rf $spdk_repo_share_dir" + rm -f $share_dir/migration.qcow2 + rm -f $share_dir/spdk.tar.gz + set -e +} + +function host_1_create_share() { + notice "Creating share directory on local server to re-use on remote" + mkdir -p $share_dir + mkdir -p $VM_BASE_DIR # This dir would've been created later but we need it now + rm -rf $share_dir/spdk.tar.gz $share_dir/spdk || true + cp $os_image $share_dir/migration.qcow2 + tar --exclude="*.o" --exclude="*.d" --exclude="*.git" -C $rootdir -zcf $share_dir/spdk.tar.gz . +} + +function host_2_create_share() { + # Copy & compile the sources for later use on remote server. + ssh_remote $MGMT_INITIATOR_IP "uname -a" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $share_dir" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $spdk_repo_share_dir" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $VM_BASE_DIR" + ssh_remote $MGMT_INITIATOR_IP "sshfs -o\ + ssh_command=\"ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ControlMaster=auto\ + -i $SPDK_VHOST_SSH_KEY_FILE\" root@$MGMT_TARGET_IP:$VM_BASE_DIR $VM_BASE_DIR" + ssh_remote $MGMT_INITIATOR_IP "sshfs -o\ + ssh_command=\"ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ControlMaster=auto\ + -i $SPDK_VHOST_SSH_KEY_FILE\" root@$MGMT_TARGET_IP:$share_dir $share_dir" + ssh_remote $MGMT_INITIATOR_IP "mkdir -p $spdk_repo_share_dir/spdk" + ssh_remote $MGMT_INITIATOR_IP "tar -zxf $share_dir/spdk.tar.gz -C $spdk_repo_share_dir/spdk --strip-components=1" + ssh_remote $MGMT_INITIATOR_IP "cd $spdk_repo_share_dir/spdk; make clean; ./configure --with-rdma --enable-debug; make -j40" +} + +function host_2_start_vhost() { + ssh_remote $MGMT_INITIATOR_IP "nohup $spdk_repo_share_dir/spdk/test/vhost/migration/migration.sh\ + --test-cases=3b --os=$share_dir/migration.qcow2\ + --rdma-tgt-ip=$RDMA_TARGET_IP &>$share_dir/output.log &" + notice "Waiting for remote to be done with vhost & VM setup..." + wait_for_remote +} + +function setup_share() { + trap 'cleanup_share; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + host_1_create_share + host_2_create_share +} + +function migration_tc3() { + check_rdma_connection + setup_share + host1_start_nvmf + host1_start_vhost + host_2_start_vhost + + # Do migration + notice "Starting fio on local VM" + vm_check_scsi_location $incoming_vm + + run_fio $fio_bin --job-file="$job_file" --no-wait-for-fio --local --vm="${incoming_vm}$(printf ':/dev/%s' $SCSI_DISK)" + sleep 5 + + if ! is_fio_running $incoming_vm; then + vm_exec $incoming_vm "cat $log_file" + error "Fio not running on local VM before starting migration!" + fi + + vm_migrate $incoming_vm $RDMA_INITIATOR_IP + sleep 1 + + # Verify migration on remote host and clean up vhost + ssh_remote $MGMT_INITIATOR_IP "pkill -CONT -F $TEST_DIR/tc3b.pid" + notice "Waiting for remote to finish FIO on VM and clean up..." + wait_for_remote + + # Clean up local stuff + host1_cleanup_vhost + cleanup_share +} + +migration_tc3 diff --git a/src/spdk/test/vhost/migration/migration-tc3b.sh b/src/spdk/test/vhost/migration/migration-tc3b.sh new file mode 100644 index 000000000..22d54df73 --- /dev/null +++ b/src/spdk/test/vhost/migration/migration-tc3b.sh @@ -0,0 +1,77 @@ +# Set -m option is needed to be able to use "suspend" command +# as we are usin non-interactive session to connect to remote. +# Without -m it would be not possible to suspend the process. +set -m +source $testdir/autotest.config + +incoming_vm=1 +target_vm=2 +target_vm_ctrl=naa.VhostScsi0.$target_vm +rpc="$rootdir/scripts/rpc.py -s $(get_vhost_dir 1)/rpc.sock" +share_dir=$VHOST_DIR/share + +function host_2_cleanup_vhost() { + notice "Shutting down VM $target_vm" + vm_kill $target_vm + + notice "Removing bdev & controller from vhost 1 on remote server" + $rpc bdev_nvme_detach_controller Nvme0 + $rpc vhost_delete_controller $target_vm_ctrl + + notice "Shutting down vhost app" + vhost_kill 1 + sleep 1 +} + +function host_2_start_vhost() { + echo "BASE DIR $VHOST_DIR" + vhost_work_dir=$VHOST_DIR/vhost1 + mkdir -p $vhost_work_dir + rm -f $vhost_work_dir/* + + notice "Starting vhost 1 instance on remote server" + trap 'host_2_cleanup_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + vhost_run 1 "-u" + + $rpc bdev_nvme_attach_controller -b Nvme0 -t rdma -f ipv4 -a $RDMA_TARGET_IP -s 4420 -n "nqn.2018-02.io.spdk:cnode1" + $rpc vhost_create_scsi_controller $target_vm_ctrl + $rpc vhost_scsi_controller_add_target $target_vm_ctrl 0 Nvme0n1 + + vm_setup --os="$os_image" --force=$target_vm --disk-type=spdk_vhost_scsi --disks=VhostScsi0 \ + --memory=512 --vhost-name=1 --incoming=$incoming_vm + vm_run $target_vm + sleep 1 + + # Use this file as a flag to notify main script + # that setup on remote server is done + echo "DONE" > $share_dir/DONE +} + +echo $$ > $VHOST_DIR/tc3b.pid +host_2_start_vhost +suspend -f + +if ! vm_os_booted $target_vm; then + fail "VM$target_vm is not running!" +fi + +if ! is_fio_running $target_vm; then + vm_exec $target_vm "cat /root/migration-tc3.log" + error "FIO is not running on remote server after migration!" +fi + +notice "Waiting for FIO to finish on remote server VM" +timeout=40 +while is_fio_running $target_vm; do + sleep 1 + echo -n "." + if ((timeout-- == 0)); then + error "timeout while waiting for FIO!" + fi +done + +notice "FIO result after migration:" +vm_exec $target_vm "cat /root/migration-tc3.log" + +host_2_cleanup_vhost +echo "DONE" > $share_dir/DONE diff --git a/src/spdk/test/vhost/migration/migration.sh b/src/spdk/test/vhost/migration/migration.sh new file mode 100755 index 000000000..8f461e6ca --- /dev/null +++ b/src/spdk/test/vhost/migration/migration.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/test/common/autotest_common.sh +source $rootdir/test/vhost/common.sh +source $testdir/migration-tc1.sh +source $testdir/migration-tc2.sh + +vms=() +declare -A vms_os +declare -A vms_raw_disks +declare -A vms_ctrlrs +declare -A vms_ctrlrs_disks + +# By default use Guest fio +fio_bin="" +MGMT_TARGET_IP="" +MGMT_INITIATOR_IP="" +RDMA_TARGET_IP="" +RDMA_INITIATOR_IP="" +function usage() { + [[ -n $2 ]] && ( + echo "$2" + echo "" + ) + echo "Shortcut script for doing automated test of live migration." + echo "Usage: $(basename $1) [OPTIONS]" + echo + echo " --os ARGS VM configuration. This parameter might be used more than once:" + echo " --fio-bin=FIO Use specific fio binary (will be uploaded to VM)" + echo " --mgmt-tgt-ip=IP IP address of target." + echo " --mgmt-init-ip=IP IP address of initiator." + echo " --rdma-tgt-ip=IP IP address of targets rdma capable NIC." + echo " --rdma-init-ip=IP IP address of initiators rdma capable NIC." + echo "-x set -x for script debug" +} + +for param in "$@"; do + case "$param" in + --help | -h) + usage $0 + exit 0 + ;; + --os=*) os_image="${param#*=}" ;; + --fio-bin=*) fio_bin="${param}" ;; + --mgmt-tgt-ip=*) MGMT_TARGET_IP="${param#*=}" ;; + --mgmt-init-ip=*) MGMT_INITIATOR_IP="${param#*=}" ;; + --rdma-tgt-ip=*) RDMA_TARGET_IP="${param#*=}" ;; + --rdma-init-ip=*) RDMA_INITIATOR_IP="${param#*=}" ;; + -x) set -x ;; + -v) SPDK_VHOST_VERBOSE=true ;; + *) + usage $0 "Invalid argument '$param'" + exit 1 + ;; + esac +done + +vhosttestinit + +trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR EXIT + +function vm_monitor_send() { + local vm_num=$1 + local cmd_result_file="$2" + local vm_dir="$VM_DIR/$1" + local vm_monitor_port + vm_monitor_port=$(cat $vm_dir/monitor_port) + + [[ -n "$vm_monitor_port" ]] || fail "No monitor port!" + + shift 2 + nc 127.0.0.1 $vm_monitor_port "$@" > $cmd_result_file +} + +# Migrate VM $1 +function vm_migrate() { + local from_vm_dir="$VM_DIR/$1" + local target_vm_dir + local target_vm + local target_vm_migration_port + target_vm_dir="$(readlink -e $from_vm_dir/vm_migrate_to)" + target_vm="$(basename $target_vm_dir)" + target_vm_migration_port="$(cat $target_vm_dir/migration_port)" + if [[ -n "$2" ]]; then + local target_ip=$2 + else + local target_ip="127.0.0.1" + fi + + # Sanity check if target VM (QEMU) is configured to accept source VM (QEMU) migration + if [[ "$(readlink -e ${target_vm_dir}/vm_incoming)" != "$(readlink -e ${from_vm_dir})" ]]; then + fail "source VM $1 or destination VM is not properly configured for live migration" + fi + + timing_enter vm_migrate + notice "Migrating VM $1 to VM "$(basename $target_vm_dir) + echo -e \ + "migrate_set_speed 1g\n" \ + "migrate tcp:$target_ip:$target_vm_migration_port\n" \ + "info migrate\n" \ + "quit" | vm_monitor_send $1 "$from_vm_dir/migration_result" + + # Post migration checks: + if ! grep "Migration status: completed" $from_vm_dir/migration_result -q; then + cat $from_vm_dir/migration_result + fail "Migration failed:\n" + fi + + # Don't perform the following check if target VM is on remote server + # as we won't have access to it. + # If you need this check then perform it on your own. + if [[ "$target_ip" == "127.0.0.1" ]]; then + if ! vm_os_booted $target_vm; then + fail "VM$target_vm is not running" + cat $target_vm $target_vm_dir/cont_result + fi + fi + + notice "Migration complete" + timing_exit vm_migrate +} + +function is_fio_running() { + xtrace_disable + + if vm_exec $1 'kill -0 $(cat /root/fio.pid)'; then + local ret=0 + else + local ret=1 + fi + + xtrace_restore + return $ret +} + +run_test "vhost_migration_tc1" migration_tc1 +run_test "vhost_migration_tc2" migration_tc2 + +trap - SIGINT ERR EXIT + +vhosttestfini -- cgit v1.2.3