linux/drivers/vfio/pci/mlx5/cmd.h

241 lines
6.9 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*/
#ifndef MLX5_VFIO_CMD_H
#define MLX5_VFIO_CMD_H
#include <linux/kernel.h>
#include <linux/vfio_pci_core.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_ERROR = 1,
MLX5_MIGF_STATE_PRE_COPY_ERROR,
MLX5_MIGF_STATE_PRE_COPY,
MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK,
MLX5_MIGF_STATE_COMPLETE,
};
enum mlx5_vf_load_state {
MLX5_VF_LOAD_STATE_READ_HEADER,
MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
MLX5_VF_LOAD_STATE_PREP_IMAGE,
MLX5_VF_LOAD_STATE_READ_IMAGE,
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
};
struct mlx5_vf_migration_tag_stop_copy_data {
__le64 stop_copy_size;
};
enum mlx5_vf_migf_header_flags {
MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0,
MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0,
};
enum mlx5_vf_migf_header_tag {
MLX5_MIGF_HEADER_TAG_FW_DATA = 0,
MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE = 1 << 0,
};
struct mlx5_vf_migration_header {
__le64 record_size;
/* For future use in case we may need to change the kernel protocol */
__le32 flags; /* Use mlx5_vf_migf_header_flags */
__le32 tag; /* Use mlx5_vf_migf_header_tag */
__u8 data[]; /* Its size is given in the record_size */
};
struct mlx5_vhca_data_buffer {
struct sg_append_table table;
loff_t start_pos;
u64 length;
u64 allocated_length;
u32 mkey;
enum dma_data_direction dma_dir;
u8 dmaed:1;
vfio/mlx5: Pre-allocate chunks for the STOP_COPY phase This patch is another preparation step towards working in chunk mode. It pre-allocates chunks for the STOP_COPY phase to let the driver use them immediately and prevent an extra allocation upon that phase. Before that patch we had a single large buffer that was dedicated for the STOP_COPY phase as there was a single SAVE in the source for the last image. Once we'll move to chunk mode the idea is to have some small buffers that will be used upon the STOP_COPY phase. The driver will read-ahead from the firmware the full state in small/optimized chunks while letting QEMU/user space read in parallel the available data. Each buffer holds its chunk number to let it be recognized down the road in the coming patches. The chunk buffer size is picked-up based on the minimum size that firmware requires, the total full size and some max value in the driver code which was set to 8MB to achieve some optimized downtime in the general case. As the chunk mode is applicable even if we move directly to STOP_COPY the buffers preparation and some other related stuff is done unconditionally with regards to STOP/PRE-COPY. Note: In that phase in the series we still didn't activate the chunk mode and the first buffer will be used in all the places. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/20230911093856.81910-7-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2023-09-11 12:38:53 +03:00
u8 stop_copy_chunk_num;
struct list_head buf_elm;
struct mlx5_vf_migration_file *migf;
/* Optimize mlx5vf_get_migration_page() for sequential access */
struct scatterlist *last_offset_sg;
unsigned int sg_last_entry;
unsigned long last_offset;
};
struct mlx5vf_async_data {
struct mlx5_async_work cb_work;
struct work_struct work;
struct mlx5_vhca_data_buffer *buf;
struct mlx5_vhca_data_buffer *header_buf;
int status;
u8 stop_copy_chunk:1;
void *out;
};
struct mlx5vf_save_work_data {
struct mlx5_vf_migration_file *migf;
size_t next_required_umem_size;
struct work_struct work;
u8 chunk_num;
};
vfio/mlx5: Pre-allocate chunks for the STOP_COPY phase This patch is another preparation step towards working in chunk mode. It pre-allocates chunks for the STOP_COPY phase to let the driver use them immediately and prevent an extra allocation upon that phase. Before that patch we had a single large buffer that was dedicated for the STOP_COPY phase as there was a single SAVE in the source for the last image. Once we'll move to chunk mode the idea is to have some small buffers that will be used upon the STOP_COPY phase. The driver will read-ahead from the firmware the full state in small/optimized chunks while letting QEMU/user space read in parallel the available data. Each buffer holds its chunk number to let it be recognized down the road in the coming patches. The chunk buffer size is picked-up based on the minimum size that firmware requires, the total full size and some max value in the driver code which was set to 8MB to achieve some optimized downtime in the general case. As the chunk mode is applicable even if we move directly to STOP_COPY the buffers preparation and some other related stuff is done unconditionally with regards to STOP/PRE-COPY. Note: In that phase in the series we still didn't activate the chunk mode and the first buffer will be used in all the places. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/20230911093856.81910-7-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2023-09-11 12:38:53 +03:00
#define MAX_NUM_CHUNKS 2
struct mlx5_vf_migration_file {
struct file *filp;
struct mutex lock;
enum mlx5_vf_migf_state state;
enum mlx5_vf_load_state load_state;
u32 pdn;
loff_t max_pos;
u64 record_size;
u32 record_tag;
u64 stop_copy_prep_size;
u64 pre_copy_initial_bytes;
size_t next_required_umem_size;
u8 num_ready_chunks;
vfio/mlx5: Pre-allocate chunks for the STOP_COPY phase This patch is another preparation step towards working in chunk mode. It pre-allocates chunks for the STOP_COPY phase to let the driver use them immediately and prevent an extra allocation upon that phase. Before that patch we had a single large buffer that was dedicated for the STOP_COPY phase as there was a single SAVE in the source for the last image. Once we'll move to chunk mode the idea is to have some small buffers that will be used upon the STOP_COPY phase. The driver will read-ahead from the firmware the full state in small/optimized chunks while letting QEMU/user space read in parallel the available data. Each buffer holds its chunk number to let it be recognized down the road in the coming patches. The chunk buffer size is picked-up based on the minimum size that firmware requires, the total full size and some max value in the driver code which was set to 8MB to achieve some optimized downtime in the general case. As the chunk mode is applicable even if we move directly to STOP_COPY the buffers preparation and some other related stuff is done unconditionally with regards to STOP/PRE-COPY. Note: In that phase in the series we still didn't activate the chunk mode and the first buffer will be used in all the places. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/20230911093856.81910-7-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2023-09-11 12:38:53 +03:00
/* Upon chunk mode preserve another set of buffers for stop_copy phase */
struct mlx5_vhca_data_buffer *buf[MAX_NUM_CHUNKS];
struct mlx5_vhca_data_buffer *buf_header[MAX_NUM_CHUNKS];
struct mlx5vf_save_work_data save_data[MAX_NUM_CHUNKS];
spinlock_t list_lock;
struct list_head buf_list;
struct list_head avail_list;
struct mlx5vf_pci_core_device *mvdev;
wait_queue_head_t poll_wait;
struct completion save_comp;
struct mlx5_async_ctx async_ctx;
struct mlx5vf_async_data async_data;
};
struct mlx5_vhca_cq_buf {
struct mlx5_frag_buf_ctrl fbc;
struct mlx5_frag_buf frag_buf;
int cqe_size;
int nent;
};
struct mlx5_vhca_cq {
struct mlx5_vhca_cq_buf buf;
struct mlx5_db db;
struct mlx5_core_cq mcq;
size_t ncqe;
};
struct mlx5_vhca_recv_buf {
u32 npages;
struct page **page_list;
dma_addr_t *dma_addrs;
u32 next_rq_offset;
u32 mkey;
};
struct mlx5_vhca_qp {
struct mlx5_frag_buf buf;
struct mlx5_db db;
struct mlx5_vhca_recv_buf recv_buf;
u32 tracked_page_size;
u32 max_msg_size;
u32 qpn;
struct {
unsigned int pc;
unsigned int cc;
unsigned int wqe_cnt;
__be32 *db;
struct mlx5_frag_buf_ctrl fbc;
} rq;
};
struct mlx5_vhca_page_tracker {
u32 id;
u32 pdn;
u8 is_err:1;
u8 object_changed:1;
struct mlx5_uars_page *uar;
struct mlx5_vhca_cq cq;
struct mlx5_vhca_qp *host_qp;
struct mlx5_vhca_qp *fw_qp;
struct mlx5_nb nb;
int status;
};
struct mlx5vf_pci_core_device {
struct vfio_pci_core_device core_device;
int vf_id;
u16 vhca_id;
u8 migrate_cap:1;
u8 deferred_reset:1;
u8 mdev_detach:1;
u8 log_active:1;
u8 chunk_mode:1;
struct completion tracker_comp;
/* protect migration state */
struct mutex state_mutex;
enum vfio_device_mig_state mig_state;
/* protect the reset_done flow */
spinlock_t reset_lock;
struct mlx5_vf_migration_file *resuming_migf;
struct mlx5_vf_migration_file *saving_migf;
struct mlx5_vhca_page_tracker tracker;
struct workqueue_struct *cb_wq;
struct notifier_block nb;
struct mlx5_core_dev *mdev;
};
enum {
MLX5VF_QUERY_INC = (1UL << 0),
MLX5VF_QUERY_FINAL = (1UL << 1),
MLX5VF_QUERY_CLEANUP = (1UL << 2),
};
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size, u64 *total_size,
u8 query_flags);
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
const struct vfio_migration_ops *mig_ops,
const struct vfio_log_ops *log_ops);
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *buf, bool inc,
bool track);
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *buf);
int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
struct mlx5_vhca_data_buffer *
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
size_t length, enum dma_data_direction dma_dir);
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf);
struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
size_t length, enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
unsigned long offset);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
enum mlx5_vf_migf_state *last_save_state);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
u8 chunk_num, size_t next_required_umem_size);
int mlx5vf_start_page_tracker(struct vfio_device *vdev,
struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
int mlx5vf_stop_page_tracker(struct vfio_device *vdev);
int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
unsigned long length, struct iova_bitmap *dirty);
#endif /* MLX5_VFIO_CMD_H */