From dc50eab76b709d68175a358d6e23a5a3890764d3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:39:57 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- drivers/vfio/pci/mlx5/Kconfig | 1 + drivers/vfio/pci/mlx5/cmd.c | 103 +++++++++++---- drivers/vfio/pci/mlx5/cmd.h | 28 ++++- drivers/vfio/pci/mlx5/main.c | 284 +++++++++++++++++++++++++++++++----------- 4 files changed, 313 insertions(+), 103 deletions(-) (limited to 'drivers/vfio/pci/mlx5') diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig index 7088edc4fb..c3ced56b77 100644 --- a/drivers/vfio/pci/mlx5/Kconfig +++ b/drivers/vfio/pci/mlx5/Kconfig @@ -3,6 +3,7 @@ config MLX5_VFIO_PCI tristate "VFIO support for MLX5 PCI devices" depends on MLX5_CORE select VFIO_PCI_CORE + select IOMMUFD_DRIVER help This provides migration support for MLX5 devices using the VFIO framework. diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 33574b0447..efd1d252cd 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -86,7 +86,8 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) } int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, - size_t *state_size, u8 query_flags) + size_t *state_size, u64 *total_size, + u8 query_flags) { u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; @@ -128,6 +129,7 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); MLX5_SET(query_vhca_migration_state_in, in, incremental, query_flags & MLX5VF_QUERY_INC); + MLX5_SET(query_vhca_migration_state_in, in, chunk, mvdev->chunk_mode); ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, out); @@ -139,6 +141,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, *state_size = MLX5_GET(query_vhca_migration_state_out, out, required_umem_size); + if (total_size) + *total_size = mvdev->chunk_mode ? + MLX5_GET64(query_vhca_migration_state_out, out, + remaining_total_size) : *state_size; + return 0; } @@ -254,6 +261,9 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, mvdev->core_device.vdev.migration_flags |= VFIO_MIGRATION_PRE_COPY; + if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) + mvdev->chunk_mode = 1; + end: mlx5_vf_put_core_dev(mvdev->mdev); } @@ -428,6 +438,7 @@ end: void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) { spin_lock_irq(&buf->migf->list_lock); + buf->stop_copy_chunk_num = 0; list_add_tail(&buf->buf_elm, &buf->migf->avail_list); spin_unlock_irq(&buf->migf->list_lock); } @@ -475,6 +486,15 @@ found: return buf; } +static void +mlx5vf_save_callback_complete(struct mlx5_vf_migration_file *migf, + struct mlx5vf_async_data *async_data) +{ + kvfree(async_data->out); + complete(&migf->save_comp); + fput(migf->filp); +} + void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) { struct mlx5vf_async_data *async_data = container_of(_work, @@ -487,16 +507,15 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) mlx5vf_put_data_buffer(async_data->buf); if (async_data->header_buf) mlx5vf_put_data_buffer(async_data->header_buf); - if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) + if (!async_data->stop_copy_chunk && + async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; else migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } mutex_unlock(&migf->lock); - kvfree(async_data->out); - complete(&migf->save_comp); - fput(migf->filp); + mlx5vf_save_callback_complete(migf, async_data); } static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, @@ -536,13 +555,20 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) struct mlx5_vf_migration_file, async_data); if (!status) { + size_t next_required_umem_size = 0; + bool stop_copy_last_chunk; size_t image_size; unsigned long flags; bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && - !async_data->last_chunk; + !async_data->stop_copy_chunk; image_size = MLX5_GET(save_vhca_state_out, async_data->out, actual_image_size); + if (async_data->buf->stop_copy_chunk_num) + next_required_umem_size = MLX5_GET(save_vhca_state_out, + async_data->out, next_required_umem_size); + stop_copy_last_chunk = async_data->stop_copy_chunk && + !next_required_umem_size; if (async_data->header_buf) { status = add_buf_header(async_data->header_buf, image_size, initial_pre_copy); @@ -554,19 +580,34 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) migf->max_pos += async_data->buf->length; spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); + if (async_data->buf->stop_copy_chunk_num) { + migf->num_ready_chunks++; + if (next_required_umem_size && + migf->num_ready_chunks >= MAX_NUM_CHUNKS) { + /* Delay the next SAVE till one chunk be consumed */ + migf->next_required_umem_size = next_required_umem_size; + next_required_umem_size = 0; + } + } spin_unlock_irqrestore(&migf->list_lock, flags); - if (initial_pre_copy) + if (initial_pre_copy) { migf->pre_copy_initial_bytes += image_size; - migf->state = async_data->last_chunk ? - MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; + migf->state = MLX5_MIGF_STATE_PRE_COPY; + } + if (stop_copy_last_chunk) + migf->state = MLX5_MIGF_STATE_COMPLETE; wake_up_interruptible(&migf->poll_wait); + if (next_required_umem_size) + mlx5vf_mig_file_set_save_work(migf, + /* Picking up the next chunk num */ + (async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1, + next_required_umem_size); + mlx5vf_save_callback_complete(migf, async_data); + return; } err: - /* - * The error and the cleanup flows can't run from an - * interrupt context - */ + /* The error flow can't run from an interrupt context */ if (status == -EREMOTEIO) status = MLX5_GET(save_vhca_state_out, async_data->out, status); async_data->status = status; @@ -610,7 +651,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, async_data = &migf->async_data; async_data->buf = buf; - async_data->last_chunk = !track; + async_data->stop_copy_chunk = !track; async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; @@ -618,10 +659,15 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, } if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - if (async_data->last_chunk && migf->buf_header) { - header_buf = migf->buf_header; - migf->buf_header = NULL; - } else { + if (async_data->stop_copy_chunk) { + u8 header_idx = buf->stop_copy_chunk_num ? + buf->stop_copy_chunk_num - 1 : 0; + + header_buf = migf->buf_header[header_idx]; + migf->buf_header[header_idx] = NULL; + } + + if (!header_buf) { header_buf = mlx5vf_get_data_buffer(migf, sizeof(struct mlx5_vf_migration_header), DMA_NONE); if (IS_ERR(header_buf)) { @@ -631,8 +677,8 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, } } - if (async_data->last_chunk) - migf->state = MLX5_MIGF_STATE_SAVE_LAST; + if (async_data->stop_copy_chunk) + migf->state = MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK; async_data->header_buf = header_buf; get_file(migf->filp); @@ -707,18 +753,21 @@ void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) { struct mlx5_vhca_data_buffer *entry; + int i; lockdep_assert_held(&migf->mvdev->state_mutex); WARN_ON(migf->mvdev->mdev_detach); - if (migf->buf) { - mlx5vf_free_data_buffer(migf->buf); - migf->buf = NULL; - } + for (i = 0; i < MAX_NUM_CHUNKS; i++) { + if (migf->buf[i]) { + mlx5vf_free_data_buffer(migf->buf[i]); + migf->buf[i] = NULL; + } - if (migf->buf_header) { - mlx5vf_free_data_buffer(migf->buf_header); - migf->buf_header = NULL; + if (migf->buf_header[i]) { + mlx5vf_free_data_buffer(migf->buf_header[i]); + migf->buf_header[i] = NULL; + } } list_splice(&migf->avail_list, &migf->buf_list); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index aec4c69dd6..f2c7227fa6 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -20,7 +20,7 @@ enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_PRE_COPY_ERROR, MLX5_MIGF_STATE_PRE_COPY, - MLX5_MIGF_STATE_SAVE_LAST, + MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK, MLX5_MIGF_STATE_COMPLETE, }; @@ -64,6 +64,7 @@ struct mlx5_vhca_data_buffer { u32 mkey; enum dma_data_direction dma_dir; u8 dmaed:1; + u8 stop_copy_chunk_num; struct list_head buf_elm; struct mlx5_vf_migration_file *migf; /* Optimize mlx5vf_get_migration_page() for sequential access */ @@ -78,10 +79,19 @@ struct mlx5vf_async_data { struct mlx5_vhca_data_buffer *buf; struct mlx5_vhca_data_buffer *header_buf; int status; - u8 last_chunk:1; + u8 stop_copy_chunk:1; void *out; }; +struct mlx5vf_save_work_data { + struct mlx5_vf_migration_file *migf; + size_t next_required_umem_size; + struct work_struct work; + u8 chunk_num; +}; + +#define MAX_NUM_CHUNKS 2 + struct mlx5_vf_migration_file { struct file *filp; struct mutex lock; @@ -94,8 +104,12 @@ struct mlx5_vf_migration_file { u32 record_tag; u64 stop_copy_prep_size; u64 pre_copy_initial_bytes; - struct mlx5_vhca_data_buffer *buf; - struct mlx5_vhca_data_buffer *buf_header; + size_t next_required_umem_size; + u8 num_ready_chunks; + /* Upon chunk mode preserve another set of buffers for stop_copy phase */ + struct mlx5_vhca_data_buffer *buf[MAX_NUM_CHUNKS]; + struct mlx5_vhca_data_buffer *buf_header[MAX_NUM_CHUNKS]; + struct mlx5vf_save_work_data save_data[MAX_NUM_CHUNKS]; spinlock_t list_lock; struct list_head buf_list; struct list_head avail_list; @@ -164,6 +178,7 @@ struct mlx5vf_pci_core_device { u8 deferred_reset:1; u8 mdev_detach:1; u8 log_active:1; + u8 chunk_mode:1; struct completion tracker_comp; /* protect migration state */ struct mutex state_mutex; @@ -186,7 +201,8 @@ enum { int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, - size_t *state_size, u8 query_flags); + size_t *state_size, u64 *total_size, + u8 query_flags); void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, const struct vfio_migration_ops *mig_ops, const struct vfio_log_ops *log_ops); @@ -217,6 +233,8 @@ struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); +void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, + u8 chunk_num, size_t next_required_umem_size); int mlx5vf_start_page_tracker(struct vfio_device *vdev, struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); int mlx5vf_stop_page_tracker(struct vfio_device *vdev); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 42ec574a86..fe09a8c8af 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -24,6 +24,8 @@ /* Device specification max LOAD size */ #define MAX_LOAD_SIZE (BIT_ULL(__mlx5_bit_sz(load_vhca_state_in, size)) - 1) +#define MAX_CHUNK_SIZE SZ_8M + static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) { struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev); @@ -158,6 +160,41 @@ end: return found ? buf : NULL; } +static void mlx5vf_buf_read_done(struct mlx5_vhca_data_buffer *vhca_buf) +{ + struct mlx5_vf_migration_file *migf = vhca_buf->migf; + + if (vhca_buf->stop_copy_chunk_num) { + bool is_header = vhca_buf->dma_dir == DMA_NONE; + u8 chunk_num = vhca_buf->stop_copy_chunk_num; + size_t next_required_umem_size = 0; + + if (is_header) + migf->buf_header[chunk_num - 1] = vhca_buf; + else + migf->buf[chunk_num - 1] = vhca_buf; + + spin_lock_irq(&migf->list_lock); + list_del_init(&vhca_buf->buf_elm); + if (!is_header) { + next_required_umem_size = + migf->next_required_umem_size; + migf->next_required_umem_size = 0; + migf->num_ready_chunks--; + } + spin_unlock_irq(&migf->list_lock); + if (next_required_umem_size) + mlx5vf_mig_file_set_save_work(migf, chunk_num, + next_required_umem_size); + return; + } + + spin_lock_irq(&migf->list_lock); + list_del_init(&vhca_buf->buf_elm); + list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); + spin_unlock_irq(&migf->list_lock); +} + static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf, char __user **buf, size_t *len, loff_t *pos) { @@ -193,12 +230,8 @@ static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf, copy_len -= page_len; } - if (*pos >= vhca_buf->start_pos + vhca_buf->length) { - spin_lock_irq(&vhca_buf->migf->list_lock); - list_del_init(&vhca_buf->buf_elm); - list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); - spin_unlock_irq(&vhca_buf->migf->list_lock); - } + if (*pos >= vhca_buf->start_pos + vhca_buf->length) + mlx5vf_buf_read_done(vhca_buf); return done; } @@ -304,7 +337,75 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) wake_up_interruptible(&migf->poll_wait); } -static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf) +void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, + u8 chunk_num, size_t next_required_umem_size) +{ + migf->save_data[chunk_num - 1].next_required_umem_size = + next_required_umem_size; + migf->save_data[chunk_num - 1].migf = migf; + get_file(migf->filp); + queue_work(migf->mvdev->cb_wq, + &migf->save_data[chunk_num - 1].work); +} + +static struct mlx5_vhca_data_buffer * +mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf, + u8 index, size_t required_length) +{ + struct mlx5_vhca_data_buffer *buf = migf->buf[index]; + u8 chunk_num; + + WARN_ON(!buf); + chunk_num = buf->stop_copy_chunk_num; + buf->migf->buf[index] = NULL; + /* Checking whether the pre-allocated buffer can fit */ + if (buf->allocated_length >= required_length) + return buf; + + mlx5vf_put_data_buffer(buf); + buf = mlx5vf_get_data_buffer(buf->migf, required_length, + DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return buf; + + buf->stop_copy_chunk_num = chunk_num; + return buf; +} + +static void mlx5vf_mig_file_save_work(struct work_struct *_work) +{ + struct mlx5vf_save_work_data *save_data = container_of(_work, + struct mlx5vf_save_work_data, work); + struct mlx5_vf_migration_file *migf = save_data->migf; + struct mlx5vf_pci_core_device *mvdev = migf->mvdev; + struct mlx5_vhca_data_buffer *buf; + + mutex_lock(&mvdev->state_mutex); + if (migf->state == MLX5_MIGF_STATE_ERROR) + goto end; + + buf = mlx5vf_mig_file_get_stop_copy_buf(migf, + save_data->chunk_num - 1, + save_data->next_required_umem_size); + if (IS_ERR(buf)) + goto err; + + if (mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false)) + goto err_save; + + goto end; + +err_save: + mlx5vf_put_data_buffer(buf); +err: + mlx5vf_mark_err(migf); +end: + mlx5vf_state_mutex_unlock(mvdev); + fput(migf->filp); +} + +static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf, + bool track) { size_t size = sizeof(struct mlx5_vf_migration_header) + sizeof(struct mlx5_vf_migration_tag_stop_copy_data); @@ -331,7 +432,7 @@ static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf) to_buff = kmap_local_page(page); memcpy(to_buff, &header, sizeof(header)); header_buf->length = sizeof(header); - data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length); + data.stop_copy_size = cpu_to_le64(migf->buf[0]->allocated_length); memcpy(to_buff + sizeof(header), &data, sizeof(data)); header_buf->length += sizeof(data); kunmap_local(to_buff); @@ -340,48 +441,86 @@ static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf) spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&header_buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); - migf->pre_copy_initial_bytes = size; + if (track) + migf->pre_copy_initial_bytes = size; return 0; err: mlx5vf_put_data_buffer(header_buf); return ret; } -static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf, - size_t state_size) +static int mlx5vf_prep_stop_copy(struct mlx5vf_pci_core_device *mvdev, + struct mlx5_vf_migration_file *migf, + size_t state_size, u64 full_size, + bool track) { struct mlx5_vhca_data_buffer *buf; size_t inc_state_size; + int num_chunks; int ret; + int i; - /* let's be ready for stop_copy size that might grow by 10 percents */ - if (check_add_overflow(state_size, state_size / 10, &inc_state_size)) - inc_state_size = state_size; + if (mvdev->chunk_mode) { + size_t chunk_size = min_t(size_t, MAX_CHUNK_SIZE, full_size); - buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE); - if (IS_ERR(buf)) - return PTR_ERR(buf); + /* from firmware perspective at least 'state_size' buffer should be set */ + inc_state_size = max(state_size, chunk_size); + } else { + if (track) { + /* let's be ready for stop_copy size that might grow by 10 percents */ + if (check_add_overflow(state_size, state_size / 10, &inc_state_size)) + inc_state_size = state_size; + } else { + inc_state_size = state_size; + } + } - migf->buf = buf; - buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto err; + /* let's not overflow the device specification max SAVE size */ + inc_state_size = min_t(size_t, inc_state_size, + (BIT_ULL(__mlx5_bit_sz(save_vhca_state_in, size)) - PAGE_SIZE)); + + num_chunks = mvdev->chunk_mode ? MAX_NUM_CHUNKS : 1; + for (i = 0; i < num_chunks; i++) { + buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } + + migf->buf[i] = buf; + buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } + migf->buf_header[i] = buf; + if (mvdev->chunk_mode) { + migf->buf[i]->stop_copy_chunk_num = i + 1; + migf->buf_header[i]->stop_copy_chunk_num = i + 1; + INIT_WORK(&migf->save_data[i].work, + mlx5vf_mig_file_save_work); + migf->save_data[i].chunk_num = i + 1; + } } - migf->buf_header = buf; - ret = mlx5vf_add_stop_copy_header(migf); + ret = mlx5vf_add_stop_copy_header(migf, track); if (ret) - goto err_header; + goto err; return 0; -err_header: - mlx5vf_put_data_buffer(migf->buf_header); - migf->buf_header = NULL; err: - mlx5vf_put_data_buffer(migf->buf); - migf->buf = NULL; + for (i = 0; i < num_chunks; i++) { + if (migf->buf[i]) { + mlx5vf_put_data_buffer(migf->buf[i]); + migf->buf[i] = NULL; + } + if (migf->buf_header[i]) { + mlx5vf_put_data_buffer(migf->buf_header[i]); + migf->buf_header[i] = NULL; + } + } + return ret; } @@ -428,7 +567,7 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, * As so, the other code below is safe with the proper locks. */ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length, - MLX5VF_QUERY_INC); + NULL, MLX5VF_QUERY_INC); if (ret) goto err_state_unlock; } @@ -505,21 +644,15 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) if (migf->state == MLX5_MIGF_STATE_ERROR) return -ENODEV; - ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, NULL, MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL); if (ret) goto err; - /* Checking whether we have a matching pre-allocated buffer that can fit */ - if (migf->buf && migf->buf->allocated_length >= length) { - buf = migf->buf; - migf->buf = NULL; - } else { - buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto err; - } + buf = mlx5vf_mig_file_get_stop_copy_buf(migf, 0, length); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; } ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false); @@ -541,6 +674,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) struct mlx5_vf_migration_file *migf; struct mlx5_vhca_data_buffer *buf; size_t length; + u64 full_size; int ret; migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); @@ -574,20 +708,25 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) INIT_LIST_HEAD(&migf->buf_list); INIT_LIST_HEAD(&migf->avail_list); spin_lock_init(&migf->list_lock); - ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, 0); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, &full_size, 0); + if (ret) + goto out_pd; + + ret = mlx5vf_prep_stop_copy(mvdev, migf, length, full_size, track); if (ret) goto out_pd; if (track) { - ret = mlx5vf_prep_stop_copy(migf, length); - if (ret) + /* leave the allocated buffer ready for the stop-copy phase */ + buf = mlx5vf_alloc_data_buffer(migf, + migf->buf[0]->allocated_length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); goto out_pd; - } - - buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto out_pd; + } + } else { + buf = migf->buf[0]; + migf->buf[0] = NULL; } ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, track); @@ -820,8 +959,8 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; - struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; - struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header; + struct mlx5_vhca_data_buffer *vhca_buf = migf->buf[0]; + struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header[0]; loff_t requested_length; bool has_work = false; ssize_t done = 0; @@ -856,15 +995,15 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, if (vhca_buf_header->allocated_length < migf->record_size) { mlx5vf_free_data_buffer(vhca_buf_header); - migf->buf_header = mlx5vf_alloc_data_buffer(migf, + migf->buf_header[0] = mlx5vf_alloc_data_buffer(migf, migf->record_size, DMA_NONE); - if (IS_ERR(migf->buf_header)) { - ret = PTR_ERR(migf->buf_header); - migf->buf_header = NULL; + if (IS_ERR(migf->buf_header[0])) { + ret = PTR_ERR(migf->buf_header[0]); + migf->buf_header[0] = NULL; goto out_unlock; } - vhca_buf_header = migf->buf_header; + vhca_buf_header = migf->buf_header[0]; } vhca_buf_header->start_pos = migf->max_pos; @@ -884,15 +1023,15 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, if (vhca_buf->allocated_length < size) { mlx5vf_free_data_buffer(vhca_buf); - migf->buf = mlx5vf_alloc_data_buffer(migf, + migf->buf[0] = mlx5vf_alloc_data_buffer(migf, size, DMA_TO_DEVICE); - if (IS_ERR(migf->buf)) { - ret = PTR_ERR(migf->buf); - migf->buf = NULL; + if (IS_ERR(migf->buf[0])) { + ret = PTR_ERR(migf->buf[0]); + migf->buf[0] = NULL; goto out_unlock; } - vhca_buf = migf->buf; + vhca_buf = migf->buf[0]; } vhca_buf->start_pos = migf->max_pos; @@ -974,7 +1113,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) goto out_pd; } - migf->buf = buf; + migf->buf[0] = buf; if (MLX5VF_PRE_COPY_SUPP(mvdev)) { buf = mlx5vf_alloc_data_buffer(migf, sizeof(struct mlx5_vf_migration_header), DMA_NONE); @@ -983,7 +1122,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) goto out_buf; } - migf->buf_header = buf; + migf->buf_header[0] = buf; migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; } else { /* Initial state will be to read the image */ @@ -997,7 +1136,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) spin_lock_init(&migf->list_lock); return migf; out_buf: - mlx5vf_free_data_buffer(migf->buf); + mlx5vf_free_data_buffer(migf->buf[0]); out_pd: mlx5vf_cmd_dealloc_pd(migf); out_free: @@ -1019,6 +1158,7 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); cancel_work_sync(&mvdev->saving_migf->async_data.work); mlx5vf_disable_fd(mvdev->saving_migf); + wake_up_interruptible(&mvdev->saving_migf->poll_wait); mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); fput(mvdev->saving_migf->filp); mvdev->saving_migf = NULL; @@ -1100,7 +1240,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { ret = mlx5vf_cmd_load_vhca_state(mvdev, mvdev->resuming_migf, - mvdev->resuming_migf->buf); + mvdev->resuming_migf->buf[0]); if (ret) return ERR_PTR(ret); } @@ -1194,13 +1334,14 @@ static int mlx5vf_pci_get_data_size(struct vfio_device *vdev, struct mlx5vf_pci_core_device *mvdev = container_of( vdev, struct mlx5vf_pci_core_device, core_device.vdev); size_t state_size; + u64 total_size; int ret; mutex_lock(&mvdev->state_mutex); - ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, - &state_size, 0); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &state_size, + &total_size, 0); if (!ret) - *stop_copy_length = state_size; + *stop_copy_length = total_size; mlx5vf_state_mutex_unlock(mvdev); return ret; } @@ -1376,6 +1517,7 @@ static struct pci_driver mlx5vf_pci_driver = { module_pci_driver(mlx5vf_pci_driver); +MODULE_IMPORT_NS(IOMMUFD); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Max Gurtovoy "); MODULE_AUTHOR("Yishai Hadas "); -- cgit v1.2.3