mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-03 15:55:38 +00:00
vfio/mlx5: Improve the source side flow upon pre_copy
Improve the source side flow upon pre_copy as of below. - Prepare the stop_copy buffers as part of moving to pre_copy. - Send to the target a record that includes the expected stop_copy size to let it optimize its stop_copy flow as well. As for sending the target this new record type (i.e. MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE) we split the current 64 header flags bits into 32 flags bits and another 32 tag bits, each record may have a tag and a flag whether it's optional or mandatory. Optional records will be ignored in the target. The above reduces the downtime upon stop_copy as the relevant data stuff is prepared ahead as part of pre_copy. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20230124144955.139901-3-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
parent
caf094b5a1
commit
b04e2e86e9
3 changed files with 151 additions and 34 deletions
|
@ -500,7 +500,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
|
|||
}
|
||||
|
||||
static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
|
||||
size_t image_size)
|
||||
size_t image_size, bool initial_pre_copy)
|
||||
{
|
||||
struct mlx5_vf_migration_file *migf = header_buf->migf;
|
||||
struct mlx5_vf_migration_header header = {};
|
||||
|
@ -508,7 +508,9 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
|
|||
struct page *page;
|
||||
u8 *to_buff;
|
||||
|
||||
header.image_size = cpu_to_le64(image_size);
|
||||
header.record_size = cpu_to_le64(image_size);
|
||||
header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY);
|
||||
header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA);
|
||||
page = mlx5vf_get_migration_page(header_buf, 0);
|
||||
if (!page)
|
||||
return -EINVAL;
|
||||
|
@ -516,12 +518,13 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
|
|||
memcpy(to_buff, &header, sizeof(header));
|
||||
kunmap_local(to_buff);
|
||||
header_buf->length = sizeof(header);
|
||||
header_buf->header_image_size = image_size;
|
||||
header_buf->start_pos = header_buf->migf->max_pos;
|
||||
migf->max_pos += header_buf->length;
|
||||
spin_lock_irqsave(&migf->list_lock, flags);
|
||||
list_add_tail(&header_buf->buf_elm, &migf->buf_list);
|
||||
spin_unlock_irqrestore(&migf->list_lock, flags);
|
||||
if (initial_pre_copy)
|
||||
migf->pre_copy_initial_bytes += sizeof(header);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -535,11 +538,14 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
|
|||
if (!status) {
|
||||
size_t image_size;
|
||||
unsigned long flags;
|
||||
bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY &&
|
||||
!async_data->last_chunk;
|
||||
|
||||
image_size = MLX5_GET(save_vhca_state_out, async_data->out,
|
||||
actual_image_size);
|
||||
if (async_data->header_buf) {
|
||||
status = add_buf_header(async_data->header_buf, image_size);
|
||||
status = add_buf_header(async_data->header_buf, image_size,
|
||||
initial_pre_copy);
|
||||
if (status)
|
||||
goto err;
|
||||
}
|
||||
|
@ -549,6 +555,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
|
|||
spin_lock_irqsave(&migf->list_lock, flags);
|
||||
list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
|
||||
spin_unlock_irqrestore(&migf->list_lock, flags);
|
||||
if (initial_pre_copy)
|
||||
migf->pre_copy_initial_bytes += image_size;
|
||||
migf->state = async_data->last_chunk ?
|
||||
MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY;
|
||||
wake_up_interruptible(&migf->poll_wait);
|
||||
|
@ -610,11 +618,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
|
|||
}
|
||||
|
||||
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
|
||||
header_buf = mlx5vf_get_data_buffer(migf,
|
||||
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
|
||||
if (IS_ERR(header_buf)) {
|
||||
err = PTR_ERR(header_buf);
|
||||
goto err_free;
|
||||
if (async_data->last_chunk && migf->buf_header) {
|
||||
header_buf = migf->buf_header;
|
||||
migf->buf_header = NULL;
|
||||
} else {
|
||||
header_buf = mlx5vf_get_data_buffer(migf,
|
||||
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
|
||||
if (IS_ERR(header_buf)) {
|
||||
err = PTR_ERR(header_buf);
|
||||
goto err_free;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,10 +32,26 @@ enum mlx5_vf_load_state {
|
|||
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
|
||||
};
|
||||
|
||||
struct mlx5_vf_migration_tag_stop_copy_data {
|
||||
__le64 stop_copy_size;
|
||||
};
|
||||
|
||||
enum mlx5_vf_migf_header_flags {
|
||||
MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0,
|
||||
MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0,
|
||||
};
|
||||
|
||||
enum mlx5_vf_migf_header_tag {
|
||||
MLX5_MIGF_HEADER_TAG_FW_DATA = 0,
|
||||
MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE = 1 << 0,
|
||||
};
|
||||
|
||||
struct mlx5_vf_migration_header {
|
||||
__le64 image_size;
|
||||
__le64 record_size;
|
||||
/* For future use in case we may need to change the kernel protocol */
|
||||
__le64 flags;
|
||||
__le32 flags; /* Use mlx5_vf_migf_header_flags */
|
||||
__le32 tag; /* Use mlx5_vf_migf_header_tag */
|
||||
__u8 data[]; /* Its size is given in the record_size */
|
||||
};
|
||||
|
||||
struct mlx5_vhca_data_buffer {
|
||||
|
@ -73,6 +89,7 @@ struct mlx5_vf_migration_file {
|
|||
enum mlx5_vf_load_state load_state;
|
||||
u32 pdn;
|
||||
loff_t max_pos;
|
||||
u64 pre_copy_initial_bytes;
|
||||
struct mlx5_vhca_data_buffer *buf;
|
||||
struct mlx5_vhca_data_buffer *buf_header;
|
||||
spinlock_t list_lock;
|
||||
|
|
|
@ -304,6 +304,87 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
|
|||
wake_up_interruptible(&migf->poll_wait);
|
||||
}
|
||||
|
||||
static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf)
|
||||
{
|
||||
size_t size = sizeof(struct mlx5_vf_migration_header) +
|
||||
sizeof(struct mlx5_vf_migration_tag_stop_copy_data);
|
||||
struct mlx5_vf_migration_tag_stop_copy_data data = {};
|
||||
struct mlx5_vhca_data_buffer *header_buf = NULL;
|
||||
struct mlx5_vf_migration_header header = {};
|
||||
unsigned long flags;
|
||||
struct page *page;
|
||||
u8 *to_buff;
|
||||
int ret;
|
||||
|
||||
header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE);
|
||||
if (IS_ERR(header_buf))
|
||||
return PTR_ERR(header_buf);
|
||||
|
||||
header.record_size = cpu_to_le64(sizeof(data));
|
||||
header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL);
|
||||
header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE);
|
||||
page = mlx5vf_get_migration_page(header_buf, 0);
|
||||
if (!page) {
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
to_buff = kmap_local_page(page);
|
||||
memcpy(to_buff, &header, sizeof(header));
|
||||
header_buf->length = sizeof(header);
|
||||
data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length);
|
||||
memcpy(to_buff + sizeof(header), &data, sizeof(data));
|
||||
header_buf->length += sizeof(data);
|
||||
kunmap_local(to_buff);
|
||||
header_buf->start_pos = header_buf->migf->max_pos;
|
||||
migf->max_pos += header_buf->length;
|
||||
spin_lock_irqsave(&migf->list_lock, flags);
|
||||
list_add_tail(&header_buf->buf_elm, &migf->buf_list);
|
||||
spin_unlock_irqrestore(&migf->list_lock, flags);
|
||||
migf->pre_copy_initial_bytes = size;
|
||||
return 0;
|
||||
err:
|
||||
mlx5vf_put_data_buffer(header_buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf,
|
||||
size_t state_size)
|
||||
{
|
||||
struct mlx5_vhca_data_buffer *buf;
|
||||
size_t inc_state_size;
|
||||
int ret;
|
||||
|
||||
/* let's be ready for stop_copy size that might grow by 10 percents */
|
||||
if (check_add_overflow(state_size, state_size / 10, &inc_state_size))
|
||||
inc_state_size = state_size;
|
||||
|
||||
buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf))
|
||||
return PTR_ERR(buf);
|
||||
|
||||
migf->buf = buf;
|
||||
buf = mlx5vf_get_data_buffer(migf,
|
||||
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
migf->buf_header = buf;
|
||||
ret = mlx5vf_add_stop_copy_header(migf);
|
||||
if (ret)
|
||||
goto err_header;
|
||||
return 0;
|
||||
|
||||
err_header:
|
||||
mlx5vf_put_data_buffer(migf->buf_header);
|
||||
migf->buf_header = NULL;
|
||||
err:
|
||||
mlx5vf_put_data_buffer(migf->buf);
|
||||
migf->buf = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
|
@ -314,7 +395,7 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
|
|||
loff_t *pos = &filp->f_pos;
|
||||
unsigned long minsz;
|
||||
size_t inc_length = 0;
|
||||
bool end_of_data;
|
||||
bool end_of_data = false;
|
||||
int ret;
|
||||
|
||||
if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
|
||||
|
@ -358,25 +439,19 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
|
|||
goto err_migf_unlock;
|
||||
}
|
||||
|
||||
buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
|
||||
if (buf) {
|
||||
if (buf->start_pos == 0) {
|
||||
info.initial_bytes = buf->header_image_size - *pos;
|
||||
} else if (buf->start_pos ==
|
||||
sizeof(struct mlx5_vf_migration_header)) {
|
||||
/* First data buffer following the header */
|
||||
info.initial_bytes = buf->start_pos +
|
||||
buf->length - *pos;
|
||||
} else {
|
||||
info.dirty_bytes = buf->start_pos + buf->length - *pos;
|
||||
}
|
||||
if (migf->pre_copy_initial_bytes > *pos) {
|
||||
info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
|
||||
} else {
|
||||
if (!end_of_data) {
|
||||
ret = -EINVAL;
|
||||
goto err_migf_unlock;
|
||||
buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
|
||||
if (buf) {
|
||||
info.dirty_bytes = buf->start_pos + buf->length - *pos;
|
||||
} else {
|
||||
if (!end_of_data) {
|
||||
ret = -EINVAL;
|
||||
goto err_migf_unlock;
|
||||
}
|
||||
info.dirty_bytes = inc_length;
|
||||
}
|
||||
|
||||
info.dirty_bytes = inc_length;
|
||||
}
|
||||
|
||||
if (!end_of_data || !inc_length) {
|
||||
|
@ -441,10 +516,16 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
|
|||
if (ret)
|
||||
goto err;
|
||||
|
||||
buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto err;
|
||||
/* Checking whether we have a matching pre-allocated buffer that can fit */
|
||||
if (migf->buf && migf->buf->allocated_length >= length) {
|
||||
buf = migf->buf;
|
||||
migf->buf = NULL;
|
||||
} else {
|
||||
buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
|
||||
|
@ -503,6 +584,12 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
|
|||
if (ret)
|
||||
goto out_pd;
|
||||
|
||||
if (track) {
|
||||
ret = mlx5vf_prep_stop_copy(migf, length);
|
||||
if (ret)
|
||||
goto out_pd;
|
||||
}
|
||||
|
||||
buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
|
@ -516,7 +603,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
|
|||
out_save:
|
||||
mlx5vf_free_data_buffer(buf);
|
||||
out_pd:
|
||||
mlx5vf_cmd_dealloc_pd(migf);
|
||||
mlx5fv_cmd_clean_migf_resources(migf);
|
||||
out_free:
|
||||
fput(migf->filp);
|
||||
end:
|
||||
|
|
Loading…
Add table
Reference in a new issue