mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
virtio, vhost: features, fixes
vhost can now support legacy threading if enabled in Kconfig vsock memory allocation strategies for large buffers have been improved, reducing pressure on kmalloc vhost now supports the in-order feature guest bits missed the merge window fixes, cleanups all over the place Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCgAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmiMvQEPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpgr8IAKUrIjqqTYXLkbCWn6tK8T+LxZ6LkMkyHA1v AJ+y5fKDeLsT5QpusD1XRjXJVqXBwQEsTN0pNVuhWHlcCpUeOFEHuJaf/QMncbc3 deFlUfMa3ihniUxBuyhojlWURsf94uTC906lCFXlIsfSKH2CW6/SjKvqR0SH5PhN 5WaqRYiSFFwDlyG2Ul4e5temP/er2KuZfYyvcYCU8VdSEp6bjvqCHd9ztFIVuByp fFWsrHce6IqR8ixOOzavEjzfd8WAN3LGzXntj5KEaX3fZ6HxCZCMv+rNVqvJmLps cSrTgIUo60nCiZb8klUCS1YTEEvmdmJg3UmmddIpIhcsCYJSbOU= =2dxm -----END PGP SIGNATURE----- Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost Pull virtio updates from Michael Tsirkin: - vhost can now support legacy threading if enabled in Kconfig - vsock memory allocation strategies for large buffers have been improved, reducing pressure on kmalloc - vhost now supports the in-order feature. guest bits missed the merge window. - fixes, cleanups all over the place * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (30 commits) vsock/virtio: Allocate nonlinear SKBs for handling large transmit buffers vsock/virtio: Rename virtio_vsock_skb_rx_put() vhost/vsock: Allocate nonlinear SKBs for handling large receive buffers vsock/virtio: Move SKB allocation lower-bound check to callers vsock/virtio: Rename virtio_vsock_alloc_skb() vsock/virtio: Resize receive buffers so that each SKB fits in a 4K page vsock/virtio: Move length check to callers of virtio_vsock_skb_rx_put() vsock/virtio: Validate length in packet header before skb_put() vhost/vsock: Avoid allocating arbitrarily-sized SKBs vhost_net: basic in_order support vhost: basic in order support vhost: fail early when __vhost_add_used() fails vhost: Reintroduce kthread API and add mode selection vdpa: Fix IDR memory leak in VDUSE module exit vdpa/mlx5: Fix release of uninitialized resources on error path vhost-scsi: Fix check for inline_sg_cnt exceeding preallocated limit virtio: virtio_dma_buf: fix missing parameter documentation vhost: Fix typos vhost: vringh: Remove unused functions vhost: vringh: Remove unused iotlb functions ...
This commit is contained in:
commit
821c9e515d
23 changed files with 574 additions and 342 deletions
|
@ -130,10 +130,10 @@ static void virtio_gpu_remove(struct virtio_device *vdev)
|
|||
|
||||
static void virtio_gpu_shutdown(struct virtio_device *vdev)
|
||||
{
|
||||
/*
|
||||
* drm does its own synchronization on shutdown.
|
||||
* Do nothing here, opt out of device reset.
|
||||
*/
|
||||
struct drm_device *dev = vdev->priv;
|
||||
|
||||
/* stop talking to the device */
|
||||
drm_dev_unplug(dev);
|
||||
}
|
||||
|
||||
static void virtio_gpu_config_changed(struct virtio_device *vdev)
|
||||
|
|
|
@ -908,6 +908,9 @@ void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
|
|||
{
|
||||
struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
|
||||
|
||||
if (!mres->wq_gc)
|
||||
return;
|
||||
|
||||
atomic_set(&mres->shutdown, 1);
|
||||
|
||||
flush_delayed_work(&mres->gc_dwork_ent);
|
||||
|
|
|
@ -2491,7 +2491,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
|
|||
}
|
||||
|
||||
mvq = &ndev->vqs[idx];
|
||||
ndev->needs_teardown = num != mvq->num_ent;
|
||||
ndev->needs_teardown |= num != mvq->num_ent;
|
||||
mvq->num_ent = num;
|
||||
}
|
||||
|
||||
|
@ -3432,15 +3432,17 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
|
|||
|
||||
ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
|
||||
/* Functions called here should be able to work with
|
||||
* uninitialized resources.
|
||||
*/
|
||||
free_fixed_resources(ndev);
|
||||
mlx5_vdpa_clean_mrs(mvdev);
|
||||
mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
|
||||
mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
|
||||
|
||||
if (!is_zero_ether_addr(ndev->config.mac)) {
|
||||
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
|
||||
mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
|
||||
}
|
||||
mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
|
||||
mlx5_vdpa_free_resources(&ndev->mvdev);
|
||||
free_irqs(ndev);
|
||||
kfree(ndev->event_cbs);
|
||||
|
@ -3888,6 +3890,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
|
|||
mvdev->actual_features =
|
||||
(device_features & BIT_ULL(VIRTIO_F_VERSION_1));
|
||||
|
||||
mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
|
||||
|
||||
ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
|
||||
ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
|
||||
if (!ndev->vqs || !ndev->event_cbs) {
|
||||
|
@ -3960,8 +3964,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
|
|||
ndev->rqt_size = 1;
|
||||
}
|
||||
|
||||
mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
|
||||
|
||||
ndev->mvdev.mlx_features = device_features;
|
||||
mvdev->vdev.dma_dev = &mdev->pdev->dev;
|
||||
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
|
||||
|
|
|
@ -2216,6 +2216,7 @@ static void vduse_exit(void)
|
|||
cdev_del(&vduse_ctrl_cdev);
|
||||
unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
|
||||
class_unregister(&vduse_class);
|
||||
idr_destroy(&vduse_idr);
|
||||
}
|
||||
module_exit(vduse_exit);
|
||||
|
||||
|
|
|
@ -95,4 +95,22 @@ config VHOST_CROSS_ENDIAN_LEGACY
|
|||
|
||||
If unsure, say "N".
|
||||
|
||||
config VHOST_ENABLE_FORK_OWNER_CONTROL
|
||||
bool "Enable VHOST_ENABLE_FORK_OWNER_CONTROL"
|
||||
default y
|
||||
help
|
||||
This option enables two IOCTLs: VHOST_SET_FORK_FROM_OWNER and
|
||||
VHOST_GET_FORK_FROM_OWNER. These allow userspace applications
|
||||
to modify the vhost worker mode for vhost devices.
|
||||
|
||||
Also expose module parameter 'fork_from_owner_default' to allow users
|
||||
to configure the default mode for vhost workers.
|
||||
|
||||
By default, `VHOST_ENABLE_FORK_OWNER_CONTROL` is set to `y`,
|
||||
users can change the worker thread mode as needed.
|
||||
If this config is disabled (n),the related IOCTLs and parameters will
|
||||
be unavailable.
|
||||
|
||||
If unsure, say "Y".
|
||||
|
||||
endif
|
||||
|
|
|
@ -74,7 +74,8 @@ static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
|
|||
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
|
||||
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
|
||||
(1ULL << VIRTIO_F_ACCESS_PLATFORM) |
|
||||
(1ULL << VIRTIO_F_RING_RESET),
|
||||
(1ULL << VIRTIO_F_RING_RESET) |
|
||||
(1ULL << VIRTIO_F_IN_ORDER),
|
||||
VIRTIO_BIT(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
|
||||
VIRTIO_BIT(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO),
|
||||
};
|
||||
|
@ -376,7 +377,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
|
|||
while (j) {
|
||||
add = min(UIO_MAXIOV - nvq->done_idx, j);
|
||||
vhost_add_used_and_signal_n(vq->dev, vq,
|
||||
&vq->heads[nvq->done_idx], add);
|
||||
&vq->heads[nvq->done_idx],
|
||||
NULL, add);
|
||||
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
|
||||
j -= add;
|
||||
}
|
||||
|
@ -451,7 +453,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
|
|||
return vhost_poll_start(poll, sock->file);
|
||||
}
|
||||
|
||||
static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
|
||||
static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
|
||||
unsigned int count)
|
||||
{
|
||||
struct vhost_virtqueue *vq = &nvq->vq;
|
||||
struct vhost_dev *dev = vq->dev;
|
||||
|
@ -459,7 +462,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
|
|||
if (!nvq->done_idx)
|
||||
return;
|
||||
|
||||
vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
|
||||
vhost_add_used_and_signal_n(dev, vq, vq->heads,
|
||||
vq->nheads, count);
|
||||
nvq->done_idx = 0;
|
||||
}
|
||||
|
||||
|
@ -468,6 +472,8 @@ static void vhost_tx_batch(struct vhost_net *net,
|
|||
struct socket *sock,
|
||||
struct msghdr *msghdr)
|
||||
{
|
||||
struct vhost_virtqueue *vq = &nvq->vq;
|
||||
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
|
||||
struct tun_msg_ctl ctl = {
|
||||
.type = TUN_MSG_PTR,
|
||||
.num = nvq->batched_xdp,
|
||||
|
@ -475,6 +481,11 @@ static void vhost_tx_batch(struct vhost_net *net,
|
|||
};
|
||||
int i, err;
|
||||
|
||||
if (in_order) {
|
||||
vq->heads[0].len = 0;
|
||||
vq->nheads[0] = nvq->done_idx;
|
||||
}
|
||||
|
||||
if (nvq->batched_xdp == 0)
|
||||
goto signal_used;
|
||||
|
||||
|
@ -496,7 +507,7 @@ static void vhost_tx_batch(struct vhost_net *net,
|
|||
}
|
||||
|
||||
signal_used:
|
||||
vhost_net_signal_used(nvq);
|
||||
vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
|
||||
nvq->batched_xdp = 0;
|
||||
}
|
||||
|
||||
|
@ -750,6 +761,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
|
|||
int sent_pkts = 0;
|
||||
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
|
||||
bool busyloop_intr;
|
||||
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
|
||||
|
||||
do {
|
||||
busyloop_intr = false;
|
||||
|
@ -786,11 +798,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
|
|||
break;
|
||||
}
|
||||
|
||||
/* We can't build XDP buff, go for single
|
||||
* packet path but let's flush batched
|
||||
* packets.
|
||||
*/
|
||||
vhost_tx_batch(net, nvq, sock, &msg);
|
||||
if (nvq->batched_xdp) {
|
||||
/* We can't build XDP buff, go for single
|
||||
* packet path but let's flush batched
|
||||
* packets.
|
||||
*/
|
||||
vhost_tx_batch(net, nvq, sock, &msg);
|
||||
}
|
||||
msg.msg_control = NULL;
|
||||
} else {
|
||||
if (tx_can_batch(vq, total_len))
|
||||
|
@ -811,8 +825,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
|
|||
pr_debug("Truncated TX packet: len %d != %zd\n",
|
||||
err, len);
|
||||
done:
|
||||
vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
|
||||
vq->heads[nvq->done_idx].len = 0;
|
||||
if (in_order) {
|
||||
vq->heads[0].id = cpu_to_vhost32(vq, head);
|
||||
} else {
|
||||
vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
|
||||
vq->heads[nvq->done_idx].len = 0;
|
||||
}
|
||||
++nvq->done_idx;
|
||||
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
|
||||
|
||||
|
@ -991,7 +1009,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
|
|||
}
|
||||
|
||||
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
|
||||
bool *busyloop_intr)
|
||||
bool *busyloop_intr, unsigned int count)
|
||||
{
|
||||
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
|
||||
struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
|
||||
|
@ -1001,7 +1019,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
|
|||
|
||||
if (!len && rvq->busyloop_timeout) {
|
||||
/* Flush batched heads first */
|
||||
vhost_net_signal_used(rnvq);
|
||||
vhost_net_signal_used(rnvq, count);
|
||||
/* Both tx vq and rx socket were polled here */
|
||||
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
|
||||
|
||||
|
@ -1013,7 +1031,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
|
|||
|
||||
/* This is a multi-buffer version of vhost_get_desc, that works if
|
||||
* vq has read descriptors only.
|
||||
* @vq - the relevant virtqueue
|
||||
* @nvq - the relevant vhost_net virtqueue
|
||||
* @datalen - data length we'll be reading
|
||||
* @iovcount - returned count of io vectors we fill
|
||||
* @log - vhost log
|
||||
|
@ -1021,14 +1039,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
|
|||
* @quota - headcount quota, 1 for big buffer
|
||||
* returns number of buffer heads allocated, negative on error
|
||||
*/
|
||||
static int get_rx_bufs(struct vhost_virtqueue *vq,
|
||||
static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
|
||||
struct vring_used_elem *heads,
|
||||
u16 *nheads,
|
||||
int datalen,
|
||||
unsigned *iovcount,
|
||||
struct vhost_log *log,
|
||||
unsigned *log_num,
|
||||
unsigned int quota)
|
||||
{
|
||||
struct vhost_virtqueue *vq = &nvq->vq;
|
||||
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
|
||||
unsigned int out, in;
|
||||
int seg = 0;
|
||||
int headcount = 0;
|
||||
|
@ -1065,14 +1086,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
|
|||
nlogs += *log_num;
|
||||
log += *log_num;
|
||||
}
|
||||
heads[headcount].id = cpu_to_vhost32(vq, d);
|
||||
len = iov_length(vq->iov + seg, in);
|
||||
heads[headcount].len = cpu_to_vhost32(vq, len);
|
||||
datalen -= len;
|
||||
if (!in_order) {
|
||||
heads[headcount].id = cpu_to_vhost32(vq, d);
|
||||
heads[headcount].len = cpu_to_vhost32(vq, len);
|
||||
}
|
||||
++headcount;
|
||||
datalen -= len;
|
||||
seg += in;
|
||||
}
|
||||
heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
|
||||
|
||||
*iovcount = seg;
|
||||
if (unlikely(log))
|
||||
*log_num = nlogs;
|
||||
|
@ -1082,6 +1105,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
|
|||
r = UIO_MAXIOV + 1;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!in_order)
|
||||
heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
|
||||
else {
|
||||
heads[0].len = cpu_to_vhost32(vq, len + datalen);
|
||||
heads[0].id = cpu_to_vhost32(vq, d);
|
||||
nheads[0] = headcount;
|
||||
}
|
||||
|
||||
return headcount;
|
||||
err:
|
||||
vhost_discard_vq_desc(vq, headcount);
|
||||
|
@ -1094,6 +1126,8 @@ static void handle_rx(struct vhost_net *net)
|
|||
{
|
||||
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
|
||||
struct vhost_virtqueue *vq = &nvq->vq;
|
||||
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
|
||||
unsigned int count = 0;
|
||||
unsigned in, log;
|
||||
struct vhost_log *vq_log;
|
||||
struct msghdr msg = {
|
||||
|
@ -1141,12 +1175,13 @@ static void handle_rx(struct vhost_net *net)
|
|||
|
||||
do {
|
||||
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
|
||||
&busyloop_intr);
|
||||
&busyloop_intr, count);
|
||||
if (!sock_len)
|
||||
break;
|
||||
sock_len += sock_hlen;
|
||||
vhost_len = sock_len + vhost_hlen;
|
||||
headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
|
||||
headcount = get_rx_bufs(nvq, vq->heads + count,
|
||||
vq->nheads + count,
|
||||
vhost_len, &in, vq_log, &log,
|
||||
likely(mergeable) ? UIO_MAXIOV : 1);
|
||||
/* On error, stop handling until the next kick. */
|
||||
|
@ -1222,8 +1257,11 @@ static void handle_rx(struct vhost_net *net)
|
|||
goto out;
|
||||
}
|
||||
nvq->done_idx += headcount;
|
||||
if (nvq->done_idx > VHOST_NET_BATCH)
|
||||
vhost_net_signal_used(nvq);
|
||||
count += in_order ? 1 : headcount;
|
||||
if (nvq->done_idx > VHOST_NET_BATCH) {
|
||||
vhost_net_signal_used(nvq, count);
|
||||
count = 0;
|
||||
}
|
||||
if (unlikely(vq_log))
|
||||
vhost_log_write(vq, vq_log, log, vhost_len,
|
||||
vq->iov, in);
|
||||
|
@ -1235,7 +1273,7 @@ static void handle_rx(struct vhost_net *net)
|
|||
else if (!sock_len)
|
||||
vhost_net_enable_vq(net, vq);
|
||||
out:
|
||||
vhost_net_signal_used(nvq);
|
||||
vhost_net_signal_used(nvq, count);
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ static int vhost_scsi_set_inline_sg_cnt(const char *buf,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (ret > VHOST_SCSI_PREALLOC_SGLS) {
|
||||
if (cnt > VHOST_SCSI_PREALLOC_SGLS) {
|
||||
pr_err("Max inline_sg_cnt is %u\n", VHOST_SCSI_PREALLOC_SGLS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -152,7 +152,7 @@ struct vhost_scsi_nexus {
|
|||
struct vhost_scsi_tpg {
|
||||
/* Vhost port target portal group tag for TCM */
|
||||
u16 tport_tpgt;
|
||||
/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
|
||||
/* Used to track number of TPG Port/Lun Links wrt to explicit I_T Nexus shutdown */
|
||||
int tv_tpg_port_count;
|
||||
/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
|
||||
int tv_tpg_vhost_count;
|
||||
|
@ -311,12 +311,12 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
|
|||
|
||||
mutex_lock(&vq->mutex);
|
||||
|
||||
/* store old infight */
|
||||
/* store old inflight */
|
||||
idx = vs->vqs[i].inflight_idx;
|
||||
if (old_inflight)
|
||||
old_inflight[i] = &vs->vqs[i].inflights[idx];
|
||||
|
||||
/* setup new infight */
|
||||
/* setup new inflight */
|
||||
vs->vqs[i].inflight_idx = idx ^ 1;
|
||||
new_inflight = &vs->vqs[i].inflights[idx ^ 1];
|
||||
kref_init(&new_inflight->kref);
|
||||
|
@ -1226,10 +1226,8 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
|
|||
/* validated at handler entry */
|
||||
vs_tpg = vhost_vq_get_backend(vq);
|
||||
tpg = READ_ONCE(vs_tpg[*vc->target]);
|
||||
if (unlikely(!tpg)) {
|
||||
vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
|
||||
if (unlikely(!tpg))
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (tpgp)
|
||||
|
@ -1249,7 +1247,7 @@ vhost_scsi_setup_resp_iovs(struct vhost_scsi_cmd *cmd, struct iovec *in_iovs,
|
|||
if (!in_iovs_cnt)
|
||||
return 0;
|
||||
/*
|
||||
* Initiator's normally just put the virtio_scsi_cmd_resp in the first
|
||||
* Initiators normally just put the virtio_scsi_cmd_resp in the first
|
||||
* iov, but just in case they wedged in some data with it we check for
|
||||
* greater than or equal to the response struct.
|
||||
*/
|
||||
|
@ -1457,7 +1455,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
|
|||
cmd = vhost_scsi_get_cmd(vq, tag);
|
||||
if (IS_ERR(cmd)) {
|
||||
ret = PTR_ERR(cmd);
|
||||
vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret);
|
||||
vq_err(vq, "vhost_scsi_get_tag failed %d\n", ret);
|
||||
goto err;
|
||||
}
|
||||
cmd->tvc_vq = vq;
|
||||
|
@ -2609,7 +2607,7 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
|
|||
return -ENOMEM;
|
||||
}
|
||||
/*
|
||||
* Since we are running in 'demo mode' this call with generate a
|
||||
* Since we are running in 'demo mode' this call will generate a
|
||||
* struct se_node_acl for the vhost_scsi struct se_portal_group with
|
||||
* the SCSI Initiator port name of the passed configfs group 'name'.
|
||||
*/
|
||||
|
@ -2915,7 +2913,7 @@ static ssize_t
|
|||
vhost_scsi_wwn_version_show(struct config_item *item, char *page)
|
||||
{
|
||||
return sysfs_emit(page, "TCM_VHOST fabric module %s on %s/%s"
|
||||
"on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
|
||||
" on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
|
||||
utsname()->machine);
|
||||
}
|
||||
|
||||
|
@ -2983,13 +2981,13 @@ out_vhost_scsi_deregister:
|
|||
vhost_scsi_deregister();
|
||||
out:
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
|
||||
static void vhost_scsi_exit(void)
|
||||
{
|
||||
target_unregister_template(&vhost_scsi_ops);
|
||||
vhost_scsi_deregister();
|
||||
};
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
|
||||
MODULE_ALIAS("tcm_vhost");
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
@ -41,6 +42,13 @@ static int max_iotlb_entries = 2048;
|
|||
module_param(max_iotlb_entries, int, 0444);
|
||||
MODULE_PARM_DESC(max_iotlb_entries,
|
||||
"Maximum number of iotlb entries. (default: 2048)");
|
||||
static bool fork_from_owner_default = VHOST_FORK_OWNER_TASK;
|
||||
|
||||
#ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL
|
||||
module_param(fork_from_owner_default, bool, 0444);
|
||||
MODULE_PARM_DESC(fork_from_owner_default,
|
||||
"Set task mode as the default(default: Y)");
|
||||
#endif
|
||||
|
||||
enum {
|
||||
VHOST_MEMORY_F_LOG = 0x1,
|
||||
|
@ -242,7 +250,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
|
|||
* test_and_set_bit() implies a memory barrier.
|
||||
*/
|
||||
llist_add(&work->node, &worker->work_list);
|
||||
vhost_task_wake(worker->vtsk);
|
||||
worker->ops->wakeup(worker);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -364,6 +372,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
|
|||
vq->avail = NULL;
|
||||
vq->used = NULL;
|
||||
vq->last_avail_idx = 0;
|
||||
vq->next_avail_head = 0;
|
||||
vq->avail_idx = 0;
|
||||
vq->last_used_idx = 0;
|
||||
vq->signalled_used = 0;
|
||||
|
@ -388,6 +397,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
|
|||
__vhost_vq_meta_reset(vq);
|
||||
}
|
||||
|
||||
static int vhost_run_work_kthread_list(void *data)
|
||||
{
|
||||
struct vhost_worker *worker = data;
|
||||
struct vhost_work *work, *work_next;
|
||||
struct vhost_dev *dev = worker->dev;
|
||||
struct llist_node *node;
|
||||
|
||||
kthread_use_mm(dev->mm);
|
||||
|
||||
for (;;) {
|
||||
/* mb paired w/ kthread_stop */
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
break;
|
||||
}
|
||||
node = llist_del_all(&worker->work_list);
|
||||
if (!node)
|
||||
schedule();
|
||||
|
||||
node = llist_reverse_order(node);
|
||||
/* make sure flag is seen after deletion */
|
||||
smp_wmb();
|
||||
llist_for_each_entry_safe(work, work_next, node, node) {
|
||||
clear_bit(VHOST_WORK_QUEUED, &work->flags);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
kcov_remote_start_common(worker->kcov_handle);
|
||||
work->fn(work);
|
||||
kcov_remote_stop();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
kthread_unuse_mm(dev->mm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool vhost_run_work_list(void *data)
|
||||
{
|
||||
struct vhost_worker *worker = data;
|
||||
|
@ -455,6 +502,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
|
|||
vq->log = NULL;
|
||||
kfree(vq->heads);
|
||||
vq->heads = NULL;
|
||||
kfree(vq->nheads);
|
||||
vq->nheads = NULL;
|
||||
}
|
||||
|
||||
/* Helper to allocate iovec buffers for all vqs. */
|
||||
|
@ -472,7 +521,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
|
|||
GFP_KERNEL);
|
||||
vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
|
||||
GFP_KERNEL);
|
||||
if (!vq->indirect || !vq->log || !vq->heads)
|
||||
vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
|
||||
GFP_KERNEL);
|
||||
if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
|
||||
goto err_nomem;
|
||||
}
|
||||
return 0;
|
||||
|
@ -552,6 +603,7 @@ void vhost_dev_init(struct vhost_dev *dev,
|
|||
dev->byte_weight = byte_weight;
|
||||
dev->use_worker = use_worker;
|
||||
dev->msg_handler = msg_handler;
|
||||
dev->fork_owner = fork_from_owner_default;
|
||||
init_waitqueue_head(&dev->wait);
|
||||
INIT_LIST_HEAD(&dev->read_list);
|
||||
INIT_LIST_HEAD(&dev->pending_list);
|
||||
|
@ -581,6 +633,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
|
||||
|
||||
struct vhost_attach_cgroups_struct {
|
||||
struct vhost_work work;
|
||||
struct task_struct *owner;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void vhost_attach_cgroups_work(struct vhost_work *work)
|
||||
{
|
||||
struct vhost_attach_cgroups_struct *s;
|
||||
|
||||
s = container_of(work, struct vhost_attach_cgroups_struct, work);
|
||||
s->ret = cgroup_attach_task_all(s->owner, current);
|
||||
}
|
||||
|
||||
static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
|
||||
{
|
||||
struct vhost_attach_cgroups_struct attach;
|
||||
int saved_cnt;
|
||||
|
||||
attach.owner = current;
|
||||
|
||||
vhost_work_init(&attach.work, vhost_attach_cgroups_work);
|
||||
vhost_worker_queue(worker, &attach.work);
|
||||
|
||||
mutex_lock(&worker->mutex);
|
||||
|
||||
/*
|
||||
* Bypass attachment_cnt check in __vhost_worker_flush:
|
||||
* Temporarily change it to INT_MAX to bypass the check
|
||||
*/
|
||||
saved_cnt = worker->attachment_cnt;
|
||||
worker->attachment_cnt = INT_MAX;
|
||||
__vhost_worker_flush(worker);
|
||||
worker->attachment_cnt = saved_cnt;
|
||||
|
||||
mutex_unlock(&worker->mutex);
|
||||
|
||||
return attach.ret;
|
||||
}
|
||||
|
||||
/* Caller should have device mutex */
|
||||
bool vhost_dev_has_owner(struct vhost_dev *dev)
|
||||
{
|
||||
|
@ -594,10 +686,10 @@ static void vhost_attach_mm(struct vhost_dev *dev)
|
|||
if (dev->use_worker) {
|
||||
dev->mm = get_task_mm(current);
|
||||
} else {
|
||||
/* vDPA device does not use worker thead, so there's
|
||||
* no need to hold the address space for mm. This help
|
||||
/* vDPA device does not use worker thread, so there's
|
||||
* no need to hold the address space for mm. This helps
|
||||
* to avoid deadlock in the case of mmap() which may
|
||||
* held the refcnt of the file and depends on release
|
||||
* hold the refcnt of the file and depends on release
|
||||
* method to remove vma.
|
||||
*/
|
||||
dev->mm = current->mm;
|
||||
|
@ -626,7 +718,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
|
|||
|
||||
WARN_ON(!llist_empty(&worker->work_list));
|
||||
xa_erase(&dev->worker_xa, worker->id);
|
||||
vhost_task_stop(worker->vtsk);
|
||||
worker->ops->stop(worker);
|
||||
kfree(worker);
|
||||
}
|
||||
|
||||
|
@ -649,42 +741,115 @@ static void vhost_workers_free(struct vhost_dev *dev)
|
|||
xa_destroy(&dev->worker_xa);
|
||||
}
|
||||
|
||||
static void vhost_task_wakeup(struct vhost_worker *worker)
|
||||
{
|
||||
return vhost_task_wake(worker->vtsk);
|
||||
}
|
||||
|
||||
static void vhost_kthread_wakeup(struct vhost_worker *worker)
|
||||
{
|
||||
wake_up_process(worker->kthread_task);
|
||||
}
|
||||
|
||||
static void vhost_task_do_stop(struct vhost_worker *worker)
|
||||
{
|
||||
return vhost_task_stop(worker->vtsk);
|
||||
}
|
||||
|
||||
static void vhost_kthread_do_stop(struct vhost_worker *worker)
|
||||
{
|
||||
kthread_stop(worker->kthread_task);
|
||||
}
|
||||
|
||||
static int vhost_task_worker_create(struct vhost_worker *worker,
|
||||
struct vhost_dev *dev, const char *name)
|
||||
{
|
||||
struct vhost_task *vtsk;
|
||||
u32 id;
|
||||
int ret;
|
||||
|
||||
vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
|
||||
worker, name);
|
||||
if (IS_ERR(vtsk))
|
||||
return PTR_ERR(vtsk);
|
||||
|
||||
worker->vtsk = vtsk;
|
||||
vhost_task_start(vtsk);
|
||||
ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
vhost_task_do_stop(worker);
|
||||
return ret;
|
||||
}
|
||||
worker->id = id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vhost_kthread_worker_create(struct vhost_worker *worker,
|
||||
struct vhost_dev *dev, const char *name)
|
||||
{
|
||||
struct task_struct *task;
|
||||
u32 id;
|
||||
int ret;
|
||||
|
||||
task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
|
||||
if (IS_ERR(task))
|
||||
return PTR_ERR(task);
|
||||
|
||||
worker->kthread_task = task;
|
||||
wake_up_process(task);
|
||||
ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
|
||||
if (ret < 0)
|
||||
goto stop_worker;
|
||||
|
||||
ret = vhost_attach_task_to_cgroups(worker);
|
||||
if (ret)
|
||||
goto stop_worker;
|
||||
|
||||
worker->id = id;
|
||||
return 0;
|
||||
|
||||
stop_worker:
|
||||
vhost_kthread_do_stop(worker);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct vhost_worker_ops kthread_ops = {
|
||||
.create = vhost_kthread_worker_create,
|
||||
.stop = vhost_kthread_do_stop,
|
||||
.wakeup = vhost_kthread_wakeup,
|
||||
};
|
||||
|
||||
static const struct vhost_worker_ops vhost_task_ops = {
|
||||
.create = vhost_task_worker_create,
|
||||
.stop = vhost_task_do_stop,
|
||||
.wakeup = vhost_task_wakeup,
|
||||
};
|
||||
|
||||
static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
|
||||
{
|
||||
struct vhost_worker *worker;
|
||||
struct vhost_task *vtsk;
|
||||
char name[TASK_COMM_LEN];
|
||||
int ret;
|
||||
u32 id;
|
||||
const struct vhost_worker_ops *ops = dev->fork_owner ? &vhost_task_ops :
|
||||
&kthread_ops;
|
||||
|
||||
worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
|
||||
if (!worker)
|
||||
return NULL;
|
||||
|
||||
worker->dev = dev;
|
||||
worker->ops = ops;
|
||||
snprintf(name, sizeof(name), "vhost-%d", current->pid);
|
||||
|
||||
vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
|
||||
worker, name);
|
||||
if (IS_ERR(vtsk))
|
||||
goto free_worker;
|
||||
|
||||
mutex_init(&worker->mutex);
|
||||
init_llist_head(&worker->work_list);
|
||||
worker->kcov_handle = kcov_common_handle();
|
||||
worker->vtsk = vtsk;
|
||||
|
||||
vhost_task_start(vtsk);
|
||||
|
||||
ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
|
||||
ret = ops->create(worker, dev, name);
|
||||
if (ret < 0)
|
||||
goto stop_worker;
|
||||
worker->id = id;
|
||||
goto free_worker;
|
||||
|
||||
return worker;
|
||||
|
||||
stop_worker:
|
||||
vhost_task_stop(vtsk);
|
||||
free_worker:
|
||||
kfree(worker);
|
||||
return NULL;
|
||||
|
@ -731,7 +896,7 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
|
|||
* We don't want to call synchronize_rcu for every vq during setup
|
||||
* because it will slow down VM startup. If we haven't done
|
||||
* VHOST_SET_VRING_KICK and not done the driver specific
|
||||
* SET_ENDPOINT/RUNNUNG then we can skip the sync since there will
|
||||
* SET_ENDPOINT/RUNNING then we can skip the sync since there will
|
||||
* not be any works queued for scsi and net.
|
||||
*/
|
||||
mutex_lock(&vq->mutex);
|
||||
|
@ -865,6 +1030,14 @@ long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl,
|
|||
switch (ioctl) {
|
||||
/* dev worker ioctls */
|
||||
case VHOST_NEW_WORKER:
|
||||
/*
|
||||
* vhost_tasks will account for worker threads under the parent's
|
||||
* NPROC value but kthreads do not. To avoid userspace overflowing
|
||||
* the system with worker threads fork_owner must be true.
|
||||
*/
|
||||
if (!dev->fork_owner)
|
||||
return -EFAULT;
|
||||
|
||||
ret = vhost_new_worker(dev, &state);
|
||||
if (!ret && copy_to_user(argp, &state, sizeof(state)))
|
||||
ret = -EFAULT;
|
||||
|
@ -982,6 +1155,7 @@ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem)
|
|||
|
||||
vhost_dev_cleanup(dev);
|
||||
|
||||
dev->fork_owner = fork_from_owner_default;
|
||||
dev->umem = umem;
|
||||
/* We don't need VQ locks below since vhost_dev_cleanup makes sure
|
||||
* VQs aren't running.
|
||||
|
@ -1990,14 +2164,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
|
|||
break;
|
||||
}
|
||||
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
|
||||
vq->last_avail_idx = s.num & 0xffff;
|
||||
vq->next_avail_head = vq->last_avail_idx =
|
||||
s.num & 0xffff;
|
||||
vq->last_used_idx = (s.num >> 16) & 0xffff;
|
||||
} else {
|
||||
if (s.num > 0xffff) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
vq->last_avail_idx = s.num;
|
||||
vq->next_avail_head = vq->last_avail_idx = s.num;
|
||||
}
|
||||
/* Forget the cached index value. */
|
||||
vq->avail_idx = vq->last_avail_idx;
|
||||
|
@ -2135,6 +2310,45 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
|
|||
goto done;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL
|
||||
if (ioctl == VHOST_SET_FORK_FROM_OWNER) {
|
||||
/* Only allow modification before owner is set */
|
||||
if (vhost_dev_has_owner(d)) {
|
||||
r = -EBUSY;
|
||||
goto done;
|
||||
}
|
||||
u8 fork_owner_val;
|
||||
|
||||
if (get_user(fork_owner_val, (u8 __user *)argp)) {
|
||||
r = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
if (fork_owner_val != VHOST_FORK_OWNER_TASK &&
|
||||
fork_owner_val != VHOST_FORK_OWNER_KTHREAD) {
|
||||
r = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
d->fork_owner = !!fork_owner_val;
|
||||
r = 0;
|
||||
goto done;
|
||||
}
|
||||
if (ioctl == VHOST_GET_FORK_FROM_OWNER) {
|
||||
u8 fork_owner_val = d->fork_owner;
|
||||
|
||||
if (fork_owner_val != VHOST_FORK_OWNER_TASK &&
|
||||
fork_owner_val != VHOST_FORK_OWNER_KTHREAD) {
|
||||
r = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
if (put_user(fork_owner_val, (u8 __user *)argp)) {
|
||||
r = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
r = 0;
|
||||
goto done;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* You must be the owner to do anything else */
|
||||
r = vhost_dev_check_owner(d);
|
||||
if (r)
|
||||
|
@ -2590,11 +2804,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
|
|||
unsigned int *out_num, unsigned int *in_num,
|
||||
struct vhost_log *log, unsigned int *log_num)
|
||||
{
|
||||
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
|
||||
struct vring_desc desc;
|
||||
unsigned int i, head, found = 0;
|
||||
u16 last_avail_idx = vq->last_avail_idx;
|
||||
__virtio16 ring_head;
|
||||
int ret, access;
|
||||
int ret, access, c = 0;
|
||||
|
||||
if (vq->avail_idx == vq->last_avail_idx) {
|
||||
ret = vhost_get_avail_idx(vq);
|
||||
|
@ -2605,17 +2820,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
|
|||
return vq->num;
|
||||
}
|
||||
|
||||
/* Grab the next descriptor number they're advertising, and increment
|
||||
* the index we've seen. */
|
||||
if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
|
||||
vq_err(vq, "Failed to read head: idx %d address %p\n",
|
||||
last_avail_idx,
|
||||
&vq->avail->ring[last_avail_idx % vq->num]);
|
||||
return -EFAULT;
|
||||
if (in_order)
|
||||
head = vq->next_avail_head & (vq->num - 1);
|
||||
else {
|
||||
/* Grab the next descriptor number they're
|
||||
* advertising, and increment the index we've seen. */
|
||||
if (unlikely(vhost_get_avail_head(vq, &ring_head,
|
||||
last_avail_idx))) {
|
||||
vq_err(vq, "Failed to read head: idx %d address %p\n",
|
||||
last_avail_idx,
|
||||
&vq->avail->ring[last_avail_idx % vq->num]);
|
||||
return -EFAULT;
|
||||
}
|
||||
head = vhost16_to_cpu(vq, ring_head);
|
||||
}
|
||||
|
||||
head = vhost16_to_cpu(vq, ring_head);
|
||||
|
||||
/* If their number is silly, that's an error. */
|
||||
if (unlikely(head >= vq->num)) {
|
||||
vq_err(vq, "Guest says index %u > %u is available",
|
||||
|
@ -2658,6 +2877,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
|
|||
"in indirect descriptor at idx %d\n", i);
|
||||
return ret;
|
||||
}
|
||||
++c;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -2693,10 +2913,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
|
|||
}
|
||||
*out_num += ret;
|
||||
}
|
||||
++c;
|
||||
} while ((i = next_desc(vq, &desc)) != -1);
|
||||
|
||||
/* On success, increment avail index. */
|
||||
vq->last_avail_idx++;
|
||||
vq->next_avail_head += c;
|
||||
|
||||
/* Assume notifications from guest are disabled at this point,
|
||||
* if they aren't we would need to update avail_event index. */
|
||||
|
@ -2720,8 +2942,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
|
|||
cpu_to_vhost32(vq, head),
|
||||
cpu_to_vhost32(vq, len)
|
||||
};
|
||||
u16 nheads = 1;
|
||||
|
||||
return vhost_add_used_n(vq, &heads, 1);
|
||||
return vhost_add_used_n(vq, &heads, &nheads, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_add_used);
|
||||
|
||||
|
@ -2757,10 +2980,9 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* After we've used one of their buffers, we tell them about it. We'll then
|
||||
* want to notify the guest, using eventfd. */
|
||||
int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
|
||||
unsigned count)
|
||||
static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
|
||||
struct vring_used_elem *heads,
|
||||
unsigned count)
|
||||
{
|
||||
int start, n, r;
|
||||
|
||||
|
@ -2773,7 +2995,72 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
|
|||
heads += n;
|
||||
count -= n;
|
||||
}
|
||||
r = __vhost_add_used_n(vq, heads, count);
|
||||
return __vhost_add_used_n(vq, heads, count);
|
||||
}
|
||||
|
||||
static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
|
||||
struct vring_used_elem *heads,
|
||||
const u16 *nheads,
|
||||
unsigned count)
|
||||
{
|
||||
vring_used_elem_t __user *used;
|
||||
u16 old, new = vq->last_used_idx;
|
||||
int start, i;
|
||||
|
||||
if (!nheads)
|
||||
return -EINVAL;
|
||||
|
||||
start = vq->last_used_idx & (vq->num - 1);
|
||||
used = vq->used->ring + start;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
if (vhost_put_used(vq, &heads[i], start, 1)) {
|
||||
vq_err(vq, "Failed to write used");
|
||||
return -EFAULT;
|
||||
}
|
||||
start += nheads[i];
|
||||
new += nheads[i];
|
||||
if (start >= vq->num)
|
||||
start -= vq->num;
|
||||
}
|
||||
|
||||
if (unlikely(vq->log_used)) {
|
||||
/* Make sure data is seen before log. */
|
||||
smp_wmb();
|
||||
/* Log used ring entry write. */
|
||||
log_used(vq, ((void __user *)used - (void __user *)vq->used),
|
||||
(vq->num - start) * sizeof *used);
|
||||
if (start + count > vq->num)
|
||||
log_used(vq, 0,
|
||||
(start + count - vq->num) * sizeof *used);
|
||||
}
|
||||
|
||||
old = vq->last_used_idx;
|
||||
vq->last_used_idx = new;
|
||||
/* If the driver never bothers to signal in a very long while,
|
||||
* used index might wrap around. If that happens, invalidate
|
||||
* signalled_used index we stored. TODO: make sure driver
|
||||
* signals at least once in 2^16 and remove this. */
|
||||
if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
|
||||
vq->signalled_used_valid = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* After we've used one of their buffers, we tell them about it. We'll then
|
||||
* want to notify the guest, using eventfd. */
|
||||
int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
|
||||
u16 *nheads, unsigned count)
|
||||
{
|
||||
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
|
||||
int r;
|
||||
|
||||
if (!in_order || !nheads)
|
||||
r = vhost_add_used_n_ooo(vq, heads, count);
|
||||
else
|
||||
r = vhost_add_used_n_in_order(vq, heads, nheads, count);
|
||||
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Make sure buffer is written before we update index. */
|
||||
smp_wmb();
|
||||
|
@ -2853,14 +3140,16 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
|
|||
/* multi-buffer version of vhost_add_used_and_signal */
|
||||
void vhost_add_used_and_signal_n(struct vhost_dev *dev,
|
||||
struct vhost_virtqueue *vq,
|
||||
struct vring_used_elem *heads, unsigned count)
|
||||
struct vring_used_elem *heads,
|
||||
u16 *nheads,
|
||||
unsigned count)
|
||||
{
|
||||
vhost_add_used_n(vq, heads, count);
|
||||
vhost_add_used_n(vq, heads, nheads, count);
|
||||
vhost_signal(dev, vq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
|
||||
|
||||
/* return true if we're sure that avaiable ring is empty */
|
||||
/* return true if we're sure that available ring is empty */
|
||||
bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
|
||||
{
|
||||
int r;
|
||||
|
|
|
@ -26,7 +26,18 @@ struct vhost_work {
|
|||
unsigned long flags;
|
||||
};
|
||||
|
||||
struct vhost_worker;
|
||||
struct vhost_dev;
|
||||
|
||||
struct vhost_worker_ops {
|
||||
int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
|
||||
const char *name);
|
||||
void (*stop)(struct vhost_worker *worker);
|
||||
void (*wakeup)(struct vhost_worker *worker);
|
||||
};
|
||||
|
||||
struct vhost_worker {
|
||||
struct task_struct *kthread_task;
|
||||
struct vhost_task *vtsk;
|
||||
struct vhost_dev *dev;
|
||||
/* Used to serialize device wide flushing with worker swapping. */
|
||||
|
@ -36,6 +47,7 @@ struct vhost_worker {
|
|||
u32 id;
|
||||
int attachment_cnt;
|
||||
bool killed;
|
||||
const struct vhost_worker_ops *ops;
|
||||
};
|
||||
|
||||
/* Poll a file (eventfd or socket) */
|
||||
|
@ -103,6 +115,8 @@ struct vhost_virtqueue {
|
|||
* Values are limited to 0x7fff, and the high bit is used as
|
||||
* a wrap counter when using VIRTIO_F_RING_PACKED. */
|
||||
u16 last_avail_idx;
|
||||
/* Next avail ring head when VIRTIO_F_IN_ORDER is negoitated */
|
||||
u16 next_avail_head;
|
||||
|
||||
/* Caches available index value from user. */
|
||||
u16 avail_idx;
|
||||
|
@ -129,6 +143,7 @@ struct vhost_virtqueue {
|
|||
struct iovec iotlb_iov[64];
|
||||
struct iovec *indirect;
|
||||
struct vring_used_elem *heads;
|
||||
u16 *nheads;
|
||||
/* Protected by virtqueue mutex. */
|
||||
struct vhost_iotlb *umem;
|
||||
struct vhost_iotlb *iotlb;
|
||||
|
@ -176,6 +191,16 @@ struct vhost_dev {
|
|||
int byte_weight;
|
||||
struct xarray worker_xa;
|
||||
bool use_worker;
|
||||
/*
|
||||
* If fork_owner is true we use vhost_tasks to create
|
||||
* the worker so all settings/limits like cgroups, NPROC,
|
||||
* scheduler, etc are inherited from the owner. If false,
|
||||
* we use kthreads and only attach to the same cgroups
|
||||
* as the owner for compat with older kernels.
|
||||
* here we use true as default value.
|
||||
* The default value is set by fork_from_owner_default
|
||||
*/
|
||||
bool fork_owner;
|
||||
int (*msg_handler)(struct vhost_dev *dev, u32 asid,
|
||||
struct vhost_iotlb_msg *msg);
|
||||
};
|
||||
|
@ -213,11 +238,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
|
|||
int vhost_vq_init_access(struct vhost_virtqueue *);
|
||||
int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
|
||||
int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
|
||||
unsigned count);
|
||||
u16 *nheads, unsigned count);
|
||||
void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
|
||||
unsigned int id, int len);
|
||||
void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
|
||||
struct vring_used_elem *heads, unsigned count);
|
||||
struct vring_used_elem *heads, u16 *nheads,
|
||||
unsigned count);
|
||||
void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
|
||||
void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
|
||||
bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
|
||||
|
|
|
@ -779,22 +779,6 @@ ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
|
|||
}
|
||||
EXPORT_SYMBOL(vringh_iov_push_user);
|
||||
|
||||
/**
|
||||
* vringh_abandon_user - we've decided not to handle the descriptor(s).
|
||||
* @vrh: the vring.
|
||||
* @num: the number of descriptors to put back (ie. num
|
||||
* vringh_get_user() to undo).
|
||||
*
|
||||
* The next vringh_get_user() will return the old descriptor(s) again.
|
||||
*/
|
||||
void vringh_abandon_user(struct vringh *vrh, unsigned int num)
|
||||
{
|
||||
/* We only update vring_avail_event(vr) when we want to be notified,
|
||||
* so we haven't changed that yet. */
|
||||
vrh->last_avail_idx -= num;
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_abandon_user);
|
||||
|
||||
/**
|
||||
* vringh_complete_user - we've finished with descriptor, publish it.
|
||||
* @vrh: the vring.
|
||||
|
@ -900,20 +884,6 @@ static inline int putused_kern(const struct vringh *vrh,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int xfer_kern(const struct vringh *vrh, void *src,
|
||||
void *dst, size_t len)
|
||||
{
|
||||
memcpy(dst, src, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kern_xfer(const struct vringh *vrh, void *dst,
|
||||
void *src, size_t len)
|
||||
{
|
||||
memcpy(dst, src, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* vringh_init_kern - initialize a vringh for a kernelspace vring.
|
||||
* @vrh: the vringh to initialize.
|
||||
|
@ -998,51 +968,6 @@ int vringh_getdesc_kern(struct vringh *vrh,
|
|||
}
|
||||
EXPORT_SYMBOL(vringh_getdesc_kern);
|
||||
|
||||
/**
|
||||
* vringh_iov_pull_kern - copy bytes from vring_iov.
|
||||
* @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
|
||||
* @dst: the place to copy.
|
||||
* @len: the maximum length to copy.
|
||||
*
|
||||
* Returns the bytes copied <= len or a negative errno.
|
||||
*/
|
||||
ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_pull_kern);
|
||||
|
||||
/**
|
||||
* vringh_iov_push_kern - copy bytes into vring_iov.
|
||||
* @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
|
||||
* @src: the place to copy from.
|
||||
* @len: the maximum length to copy.
|
||||
*
|
||||
* Returns the bytes copied <= len or a negative errno.
|
||||
*/
|
||||
ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
|
||||
const void *src, size_t len)
|
||||
{
|
||||
return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_iov_push_kern);
|
||||
|
||||
/**
|
||||
* vringh_abandon_kern - we've decided not to handle the descriptor(s).
|
||||
* @vrh: the vring.
|
||||
* @num: the number of descriptors to put back (ie. num
|
||||
* vringh_get_kern() to undo).
|
||||
*
|
||||
* The next vringh_get_kern() will return the old descriptor(s) again.
|
||||
*/
|
||||
void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
|
||||
{
|
||||
/* We only update vring_avail_event(vr) when we want to be notified,
|
||||
* so we haven't changed that yet. */
|
||||
vrh->last_avail_idx -= num;
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_abandon_kern);
|
||||
|
||||
/**
|
||||
* vringh_complete_kern - we've finished with descriptor, publish it.
|
||||
* @vrh: the vring.
|
||||
|
@ -1534,23 +1459,6 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
|
|||
}
|
||||
EXPORT_SYMBOL(vringh_iov_push_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
|
||||
* @vrh: the vring.
|
||||
* @num: the number of descriptors to put back (ie. num
|
||||
* vringh_get_iotlb() to undo).
|
||||
*
|
||||
* The next vringh_get_iotlb() will return the old descriptor(s) again.
|
||||
*/
|
||||
void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
|
||||
{
|
||||
/* We only update vring_avail_event(vr) when we want to be notified,
|
||||
* so we haven't changed that yet.
|
||||
*/
|
||||
vrh->last_avail_idx -= num;
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_abandon_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_complete_iotlb - we've finished with descriptor, publish it.
|
||||
* @vrh: the vring.
|
||||
|
@ -1571,32 +1479,6 @@ int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
|
|||
}
|
||||
EXPORT_SYMBOL(vringh_complete_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_notify_enable_iotlb - we want to know if something changes.
|
||||
* @vrh: the vring.
|
||||
*
|
||||
* This always enables notifications, but returns false if there are
|
||||
* now more buffers available in the vring.
|
||||
*/
|
||||
bool vringh_notify_enable_iotlb(struct vringh *vrh)
|
||||
{
|
||||
return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_notify_enable_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_notify_disable_iotlb - don't tell us if something changes.
|
||||
* @vrh: the vring.
|
||||
*
|
||||
* This is our normal running state: we disable and then only enable when
|
||||
* we're going to sleep.
|
||||
*/
|
||||
void vringh_notify_disable_iotlb(struct vringh *vrh)
|
||||
{
|
||||
__vringh_notify_disable(vrh, putu16_iotlb);
|
||||
}
|
||||
EXPORT_SYMBOL(vringh_notify_disable_iotlb);
|
||||
|
||||
/**
|
||||
* vringh_need_notify_iotlb - must we tell the other side about used buffers?
|
||||
* @vrh: the vring we've called vringh_complete_iotlb() on.
|
||||
|
|
|
@ -344,6 +344,10 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
|
|||
|
||||
len = iov_length(vq->iov, out);
|
||||
|
||||
if (len < VIRTIO_VSOCK_SKB_HEADROOM ||
|
||||
len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM)
|
||||
return NULL;
|
||||
|
||||
/* len contains both payload and hdr */
|
||||
skb = virtio_vsock_alloc_skb(len, GFP_KERNEL);
|
||||
if (!skb)
|
||||
|
@ -367,18 +371,15 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
|
|||
return skb;
|
||||
|
||||
/* The pkt is too big or the length in the header is invalid */
|
||||
if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ||
|
||||
payload_len + sizeof(*hdr) > len) {
|
||||
if (payload_len + sizeof(*hdr) > len) {
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
virtio_vsock_skb_rx_put(skb);
|
||||
virtio_vsock_skb_put(skb, payload_len);
|
||||
|
||||
nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
|
||||
if (nbytes != payload_len) {
|
||||
vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
|
||||
payload_len, nbytes);
|
||||
if (skb_copy_datagram_from_iter(skb, 0, &iov_iter, payload_len)) {
|
||||
vq_err(vq, "Failed to copy %zu byte payload\n", payload_len);
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(virtio_config_changed);
|
|||
|
||||
/**
|
||||
* virtio_config_driver_disable - disable config change reporting by drivers
|
||||
* @dev: the device to reset
|
||||
* @dev: the device to disable
|
||||
*
|
||||
* This is only allowed to be called by a driver and disabling can't
|
||||
* be nested.
|
||||
|
@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(virtio_config_driver_disable);
|
|||
|
||||
/**
|
||||
* virtio_config_driver_enable - enable config change reporting by drivers
|
||||
* @dev: the device to reset
|
||||
* @dev: the device to enable
|
||||
*
|
||||
* This is only allowed to be called by a driver and enabling can't
|
||||
* be nested.
|
||||
|
@ -512,7 +512,7 @@ out:
|
|||
* On error, the caller must call put_device on &@dev->dev (and not kfree),
|
||||
* as another code path may have obtained a reference to @dev.
|
||||
*
|
||||
* Returns: 0 on suceess, -error on failure
|
||||
* Returns: 0 on success, -error on failure
|
||||
*/
|
||||
int register_virtio_device(struct virtio_device *dev)
|
||||
{
|
||||
|
@ -536,6 +536,7 @@ int register_virtio_device(struct virtio_device *dev)
|
|||
goto out_ida_remove;
|
||||
|
||||
spin_lock_init(&dev->config_lock);
|
||||
dev->config_driver_disabled = false;
|
||||
dev->config_core_enabled = false;
|
||||
dev->config_change_pending = false;
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ EXPORT_SYMBOL(virtio_dma_buf_export);
|
|||
|
||||
/**
|
||||
* virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs
|
||||
* @dma_buf: [in] buffer to attach
|
||||
* @attach: [in] attachment structure
|
||||
*/
|
||||
int virtio_dma_buf_attach(struct dma_buf *dma_buf,
|
||||
struct dma_buf_attachment *attach)
|
||||
|
|
|
@ -65,7 +65,6 @@
|
|||
#include <linux/platform_device.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/virtio.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <uapi/linux/virtio_mmio.h>
|
||||
|
@ -88,22 +87,8 @@ struct virtio_mmio_device {
|
|||
|
||||
void __iomem *base;
|
||||
unsigned long version;
|
||||
|
||||
/* a list of queues so we can dispatch IRQs */
|
||||
spinlock_t lock;
|
||||
struct list_head virtqueues;
|
||||
};
|
||||
|
||||
struct virtio_mmio_vq_info {
|
||||
/* the actual virtqueue */
|
||||
struct virtqueue *vq;
|
||||
|
||||
/* the list node for the virtqueues list */
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/* Configuration interface */
|
||||
|
||||
static u64 vm_get_features(struct virtio_device *vdev)
|
||||
|
@ -300,9 +285,8 @@ static bool vm_notify_with_data(struct virtqueue *vq)
|
|||
static irqreturn_t vm_interrupt(int irq, void *opaque)
|
||||
{
|
||||
struct virtio_mmio_device *vm_dev = opaque;
|
||||
struct virtio_mmio_vq_info *info;
|
||||
struct virtqueue *vq;
|
||||
unsigned long status;
|
||||
unsigned long flags;
|
||||
irqreturn_t ret = IRQ_NONE;
|
||||
|
||||
/* Read and acknowledge interrupts */
|
||||
|
@ -315,10 +299,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
|
|||
}
|
||||
|
||||
if (likely(status & VIRTIO_MMIO_INT_VRING)) {
|
||||
spin_lock_irqsave(&vm_dev->lock, flags);
|
||||
list_for_each_entry(info, &vm_dev->virtqueues, node)
|
||||
ret |= vring_interrupt(irq, info->vq);
|
||||
spin_unlock_irqrestore(&vm_dev->lock, flags);
|
||||
virtio_device_for_each_vq(&vm_dev->vdev, vq)
|
||||
ret |= vring_interrupt(irq, vq);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -329,14 +311,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
|
|||
static void vm_del_vq(struct virtqueue *vq)
|
||||
{
|
||||
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
|
||||
struct virtio_mmio_vq_info *info = vq->priv;
|
||||
unsigned long flags;
|
||||
unsigned int index = vq->index;
|
||||
|
||||
spin_lock_irqsave(&vm_dev->lock, flags);
|
||||
list_del(&info->node);
|
||||
spin_unlock_irqrestore(&vm_dev->lock, flags);
|
||||
|
||||
/* Select and deactivate the queue */
|
||||
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
|
||||
if (vm_dev->version == 1) {
|
||||
|
@ -347,8 +323,6 @@ static void vm_del_vq(struct virtqueue *vq)
|
|||
}
|
||||
|
||||
vring_del_virtqueue(vq);
|
||||
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
static void vm_del_vqs(struct virtio_device *vdev)
|
||||
|
@ -375,9 +349,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
|
|||
{
|
||||
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
|
||||
bool (*notify)(struct virtqueue *vq);
|
||||
struct virtio_mmio_vq_info *info;
|
||||
struct virtqueue *vq;
|
||||
unsigned long flags;
|
||||
unsigned int num;
|
||||
int err;
|
||||
|
||||
|
@ -399,13 +371,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
|
|||
goto error_available;
|
||||
}
|
||||
|
||||
/* Allocate and fill out our active queue description */
|
||||
info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
err = -ENOMEM;
|
||||
goto error_kmalloc;
|
||||
}
|
||||
|
||||
num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
|
||||
if (num == 0) {
|
||||
err = -ENOENT;
|
||||
|
@ -463,13 +428,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
|
|||
writel(1, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
|
||||
}
|
||||
|
||||
vq->priv = info;
|
||||
info->vq = vq;
|
||||
|
||||
spin_lock_irqsave(&vm_dev->lock, flags);
|
||||
list_add(&info->node, &vm_dev->virtqueues);
|
||||
spin_unlock_irqrestore(&vm_dev->lock, flags);
|
||||
|
||||
return vq;
|
||||
|
||||
error_bad_pfn:
|
||||
|
@ -481,8 +439,6 @@ error_new_virtqueue:
|
|||
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
|
||||
WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY));
|
||||
}
|
||||
kfree(info);
|
||||
error_kmalloc:
|
||||
error_available:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
@ -627,8 +583,6 @@ static int virtio_mmio_probe(struct platform_device *pdev)
|
|||
vm_dev->vdev.dev.release = virtio_mmio_release_dev;
|
||||
vm_dev->vdev.config = &virtio_mmio_config_ops;
|
||||
vm_dev->pdev = pdev;
|
||||
INIT_LIST_HEAD(&vm_dev->virtqueues);
|
||||
spin_lock_init(&vm_dev->lock);
|
||||
|
||||
vm_dev->base = devm_platform_ioremap_resource(pdev, 0);
|
||||
if (IS_ERR(vm_dev->base)) {
|
||||
|
|
|
@ -2296,6 +2296,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
|
|||
* at the same time (except where noted).
|
||||
*
|
||||
* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
|
||||
*
|
||||
* NB: ENOSPC is a special code that is only returned on an attempt to add a
|
||||
* buffer to a full VQ. It indicates that some buffers are outstanding and that
|
||||
* the operation can be retried after some buffers have been used.
|
||||
*/
|
||||
int virtqueue_add_sgs(struct virtqueue *_vq,
|
||||
struct scatterlist *sgs[],
|
||||
|
|
|
@ -28,19 +28,6 @@ struct virtio_vdpa_device {
|
|||
struct virtio_device vdev;
|
||||
struct vdpa_device *vdpa;
|
||||
u64 features;
|
||||
|
||||
/* The lock to protect virtqueue list */
|
||||
spinlock_t lock;
|
||||
/* List of virtio_vdpa_vq_info */
|
||||
struct list_head virtqueues;
|
||||
};
|
||||
|
||||
struct virtio_vdpa_vq_info {
|
||||
/* the actual virtqueue */
|
||||
struct virtqueue *vq;
|
||||
|
||||
/* the list node for the virtqueues list */
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
static inline struct virtio_vdpa_device *
|
||||
|
@ -135,9 +122,9 @@ static irqreturn_t virtio_vdpa_config_cb(void *private)
|
|||
|
||||
static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
|
||||
{
|
||||
struct virtio_vdpa_vq_info *info = private;
|
||||
struct virtqueue *vq = private;
|
||||
|
||||
return vring_interrupt(0, info->vq);
|
||||
return vring_interrupt(0, vq);
|
||||
}
|
||||
|
||||
static struct virtqueue *
|
||||
|
@ -145,18 +132,15 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
|||
void (*callback)(struct virtqueue *vq),
|
||||
const char *name, bool ctx)
|
||||
{
|
||||
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
|
||||
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
|
||||
struct device *dma_dev;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct virtio_vdpa_vq_info *info;
|
||||
bool (*notify)(struct virtqueue *vq) = virtio_vdpa_notify;
|
||||
struct vdpa_callback cb;
|
||||
struct virtqueue *vq;
|
||||
u64 desc_addr, driver_addr, device_addr;
|
||||
/* Assume split virtqueue, switch to packed if necessary */
|
||||
struct vdpa_vq_state state = {0};
|
||||
unsigned long flags;
|
||||
u32 align, max_num, min_num = 1;
|
||||
bool may_reduce_num = true;
|
||||
int err;
|
||||
|
@ -179,10 +163,6 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
|||
if (ops->get_vq_ready(vdpa, index))
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
/* Allocate and fill out our active queue description */
|
||||
info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (ops->get_vq_size)
|
||||
max_num = ops->get_vq_size(vdpa, index);
|
||||
else
|
||||
|
@ -217,7 +197,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
|||
|
||||
/* Setup virtqueue callback */
|
||||
cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
|
||||
cb.private = info;
|
||||
cb.private = vq;
|
||||
cb.trigger = NULL;
|
||||
ops->set_vq_cb(vdpa, index, &cb);
|
||||
ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
|
||||
|
@ -248,13 +228,6 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
|
|||
|
||||
ops->set_vq_ready(vdpa, index, 1);
|
||||
|
||||
vq->priv = info;
|
||||
info->vq = vq;
|
||||
|
||||
spin_lock_irqsave(&vd_dev->lock, flags);
|
||||
list_add(&info->node, &vd_dev->virtqueues);
|
||||
spin_unlock_irqrestore(&vd_dev->lock, flags);
|
||||
|
||||
return vq;
|
||||
|
||||
err_vq:
|
||||
|
@ -263,7 +236,6 @@ error_new_virtqueue:
|
|||
ops->set_vq_ready(vdpa, index, 0);
|
||||
/* VDPA driver should make sure vq is stopeed here */
|
||||
WARN_ON(ops->get_vq_ready(vdpa, index));
|
||||
kfree(info);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
|
@ -272,20 +244,12 @@ static void virtio_vdpa_del_vq(struct virtqueue *vq)
|
|||
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev);
|
||||
struct vdpa_device *vdpa = vd_dev->vdpa;
|
||||
const struct vdpa_config_ops *ops = vdpa->config;
|
||||
struct virtio_vdpa_vq_info *info = vq->priv;
|
||||
unsigned int index = vq->index;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&vd_dev->lock, flags);
|
||||
list_del(&info->node);
|
||||
spin_unlock_irqrestore(&vd_dev->lock, flags);
|
||||
|
||||
/* Select and deactivate the queue (best effort) */
|
||||
ops->set_vq_ready(vdpa, index, 0);
|
||||
|
||||
vring_del_virtqueue(vq);
|
||||
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
static void virtio_vdpa_del_vqs(struct virtio_device *vdev)
|
||||
|
@ -502,8 +466,6 @@ static int virtio_vdpa_probe(struct vdpa_device *vdpa)
|
|||
vd_dev->vdev.dev.release = virtio_vdpa_release_dev;
|
||||
vd_dev->vdev.config = &virtio_vdpa_config_ops;
|
||||
vd_dev->vdpa = vdpa;
|
||||
INIT_LIST_HEAD(&vd_dev->virtqueues);
|
||||
spin_lock_init(&vd_dev->lock);
|
||||
|
||||
vd_dev->vdev.id.device = ops->get_device_id(vdpa);
|
||||
if (vd_dev->vdev.id.device == 0)
|
||||
|
|
|
@ -199,7 +199,7 @@ int virtio_device_reset_done(struct virtio_device *dev);
|
|||
size_t virtio_max_dma_size(const struct virtio_device *vdev);
|
||||
|
||||
#define virtio_device_for_each_vq(vdev, vq) \
|
||||
list_for_each_entry(vq, &vdev->vqs, list)
|
||||
list_for_each_entry(vq, &(vdev)->vqs, list)
|
||||
|
||||
/**
|
||||
* struct virtio_driver - operations for a virtio I/O driver
|
||||
|
|
|
@ -47,31 +47,50 @@ static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb)
|
|||
VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false;
|
||||
}
|
||||
|
||||
static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb)
|
||||
static inline void virtio_vsock_skb_put(struct sk_buff *skb, u32 len)
|
||||
{
|
||||
u32 len;
|
||||
DEBUG_NET_WARN_ON_ONCE(skb->len);
|
||||
|
||||
len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
|
||||
|
||||
if (len > 0)
|
||||
if (skb_is_nonlinear(skb))
|
||||
skb->len = len;
|
||||
else
|
||||
skb_put(skb, len);
|
||||
}
|
||||
|
||||
static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask)
|
||||
static inline struct sk_buff *
|
||||
__virtio_vsock_alloc_skb_with_frags(unsigned int header_len,
|
||||
unsigned int data_len,
|
||||
gfp_t mask)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
int err;
|
||||
|
||||
if (size < VIRTIO_VSOCK_SKB_HEADROOM)
|
||||
return NULL;
|
||||
|
||||
skb = alloc_skb(size, mask);
|
||||
skb = alloc_skb_with_frags(header_len, data_len,
|
||||
PAGE_ALLOC_COSTLY_ORDER, &err, mask);
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM);
|
||||
skb->data_len = data_len;
|
||||
return skb;
|
||||
}
|
||||
|
||||
static inline struct sk_buff *
|
||||
virtio_vsock_alloc_linear_skb(unsigned int size, gfp_t mask)
|
||||
{
|
||||
return __virtio_vsock_alloc_skb_with_frags(size, 0, mask);
|
||||
}
|
||||
|
||||
static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask)
|
||||
{
|
||||
if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
|
||||
return virtio_vsock_alloc_linear_skb(size, mask);
|
||||
|
||||
size -= VIRTIO_VSOCK_SKB_HEADROOM;
|
||||
return __virtio_vsock_alloc_skb_with_frags(VIRTIO_VSOCK_SKB_HEADROOM,
|
||||
size, mask);
|
||||
}
|
||||
|
||||
static inline void
|
||||
virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb)
|
||||
{
|
||||
|
@ -111,7 +130,12 @@ static inline size_t virtio_vsock_skb_len(struct sk_buff *skb)
|
|||
return (size_t)(skb_end_pointer(skb) - skb->head);
|
||||
}
|
||||
|
||||
#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4)
|
||||
/* Dimension the RX SKB so that the entire thing fits exactly into
|
||||
* a single 4KiB page. This avoids wasting memory due to alloc_skb()
|
||||
* rounding up to the next page order and also means that we
|
||||
* don't leave higher-order pages sitting around in the RX queue.
|
||||
*/
|
||||
#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4)
|
||||
#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL
|
||||
#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64)
|
||||
|
||||
|
|
|
@ -175,9 +175,6 @@ int vringh_complete_multi_user(struct vringh *vrh,
|
|||
const struct vring_used_elem used[],
|
||||
unsigned num_used);
|
||||
|
||||
/* Pretend we've never seen descriptor (for easy error handling). */
|
||||
void vringh_abandon_user(struct vringh *vrh, unsigned int num);
|
||||
|
||||
/* Do we need to fire the eventfd to notify the other side? */
|
||||
int vringh_need_notify_user(struct vringh *vrh);
|
||||
|
||||
|
@ -235,10 +232,6 @@ int vringh_getdesc_kern(struct vringh *vrh,
|
|||
u16 *head,
|
||||
gfp_t gfp);
|
||||
|
||||
ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len);
|
||||
ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
|
||||
const void *src, size_t len);
|
||||
void vringh_abandon_kern(struct vringh *vrh, unsigned int num);
|
||||
int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len);
|
||||
|
||||
bool vringh_notify_enable_kern(struct vringh *vrh);
|
||||
|
@ -319,13 +312,8 @@ ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
|
|||
struct vringh_kiov *wiov,
|
||||
const void *src, size_t len);
|
||||
|
||||
void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num);
|
||||
|
||||
int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len);
|
||||
|
||||
bool vringh_notify_enable_iotlb(struct vringh *vrh);
|
||||
void vringh_notify_disable_iotlb(struct vringh *vrh);
|
||||
|
||||
int vringh_need_notify_iotlb(struct vringh *vrh);
|
||||
|
||||
#endif /* CONFIG_VHOST_IOTLB */
|
||||
|
|
|
@ -242,4 +242,32 @@
|
|||
#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \
|
||||
struct vhost_features_array)
|
||||
|
||||
/* fork_owner values for vhost */
|
||||
#define VHOST_FORK_OWNER_KTHREAD 0
|
||||
#define VHOST_FORK_OWNER_TASK 1
|
||||
|
||||
/**
|
||||
* VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device,
|
||||
* This ioctl must called before VHOST_SET_OWNER.
|
||||
* Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
|
||||
*
|
||||
* @param fork_owner: An 8-bit value that determines the vhost thread mode
|
||||
*
|
||||
* When fork_owner is set to VHOST_FORK_OWNER_TASK(default value):
|
||||
* - Vhost will create vhost worker as tasks forked from the owner,
|
||||
* inheriting all of the owner's attributes.
|
||||
*
|
||||
* When fork_owner is set to VHOST_FORK_OWNER_KTHREAD:
|
||||
* - Vhost will create vhost workers as kernel threads.
|
||||
*/
|
||||
#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8)
|
||||
|
||||
/**
|
||||
* VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device.
|
||||
* Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
|
||||
*
|
||||
* @return: An 8-bit value indicating the current thread mode.
|
||||
*/
|
||||
#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
|
|||
tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args);
|
||||
if (IS_ERR(tsk)) {
|
||||
kfree(vtsk);
|
||||
return ERR_PTR(PTR_ERR(tsk));
|
||||
return ERR_CAST(tsk);
|
||||
}
|
||||
|
||||
vtsk->task = tsk;
|
||||
|
|
|
@ -307,7 +307,7 @@ out_rcu:
|
|||
|
||||
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
|
||||
{
|
||||
int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM;
|
||||
int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
|
||||
struct scatterlist pkt, *p;
|
||||
struct virtqueue *vq;
|
||||
struct sk_buff *skb;
|
||||
|
@ -316,7 +316,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
|
|||
vq = vsock->vqs[VSOCK_VQ_RX];
|
||||
|
||||
do {
|
||||
skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL);
|
||||
skb = virtio_vsock_alloc_linear_skb(total_len, GFP_KERNEL);
|
||||
if (!skb)
|
||||
break;
|
||||
|
||||
|
@ -624,8 +624,9 @@ static void virtio_transport_rx_work(struct work_struct *work)
|
|||
do {
|
||||
virtqueue_disable_cb(vq);
|
||||
for (;;) {
|
||||
unsigned int len, payload_len;
|
||||
struct virtio_vsock_hdr *hdr;
|
||||
struct sk_buff *skb;
|
||||
unsigned int len;
|
||||
|
||||
if (!virtio_transport_more_replies(vsock)) {
|
||||
/* Stop rx until the device processes already
|
||||
|
@ -642,13 +643,22 @@ static void virtio_transport_rx_work(struct work_struct *work)
|
|||
vsock->rx_buf_nr--;
|
||||
|
||||
/* Drop short/long packets */
|
||||
if (unlikely(len < sizeof(struct virtio_vsock_hdr) ||
|
||||
if (unlikely(len < sizeof(*hdr) ||
|
||||
len > virtio_vsock_skb_len(skb))) {
|
||||
kfree_skb(skb);
|
||||
continue;
|
||||
}
|
||||
|
||||
virtio_vsock_skb_rx_put(skb);
|
||||
hdr = virtio_vsock_hdr(skb);
|
||||
payload_len = le32_to_cpu(hdr->len);
|
||||
if (unlikely(payload_len > len - sizeof(*hdr))) {
|
||||
kfree_skb(skb);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (payload_len)
|
||||
virtio_vsock_skb_put(skb, payload_len);
|
||||
|
||||
virtio_transport_deliver_tap_pkt(skb);
|
||||
virtio_transport_recv_pkt(&virtio_transport, skb);
|
||||
}
|
||||
|
|
|
@ -109,7 +109,8 @@ static int virtio_transport_fill_skb(struct sk_buff *skb,
|
|||
return __zerocopy_sg_from_iter(info->msg, NULL, skb,
|
||||
&info->msg->msg_iter, len, NULL);
|
||||
|
||||
return memcpy_from_msg(skb_put(skb, len), info->msg, len);
|
||||
virtio_vsock_skb_put(skb, len);
|
||||
return skb_copy_datagram_from_iter(skb, 0, &info->msg->msg_iter, len);
|
||||
}
|
||||
|
||||
static void virtio_transport_init_hdr(struct sk_buff *skb,
|
||||
|
|
Loading…
Add table
Reference in a new issue