linux/drivers/s390/cio/vfio_ccw_drv.c

493 lines
12 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* VFIO based Physical Subchannel device driver
*
* Copyright IBM Corp. 2017
* Copyright Red Hat, Inc. 2019
*
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
* Cornelia Huck <cohuck@redhat.com>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/mdev.h>
#include <asm/isc.h>
#include "chp.h"
#include "ioasm.h"
#include "css.h"
#include "vfio_ccw_private.h"
struct workqueue_struct *vfio_ccw_work_q;
struct kmem_cache *vfio_ccw_io_region;
struct kmem_cache *vfio_ccw_cmd_region;
struct kmem_cache *vfio_ccw_schib_region;
struct kmem_cache *vfio_ccw_crw_region;
debug_info_t *vfio_ccw_debug_msg_id;
debug_info_t *vfio_ccw_debug_trace_id;
/*
* Helpers
*/
int vfio_ccw_sch_quiesce(struct subchannel *sch)
{
struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
DECLARE_COMPLETION_ONSTACK(completion);
int iretry, ret = 0;
/*
* Probably an impossible situation, after being called through
* FSM callbacks. But in the event it did, register a warning
* and return as if things were fine.
*/
if (WARN_ON(!private))
return 0;
iretry = 255;
do {
ret = cio_cancel_halt_clear(sch, &iretry);
if (ret == -EIO) {
pr_err("vfio_ccw: could not quiesce subchannel 0.%x.%04x!\n",
sch->schid.ssid, sch->schid.sch_no);
break;
}
/*
* Flush all I/O and wait for
* cancel/halt/clear completion.
*/
private->completion = &completion;
spin_unlock_irq(&sch->lock);
if (ret == -EBUSY)
wait_for_completion_timeout(&completion, 3*HZ);
private->completion = NULL;
flush_workqueue(vfio_ccw_work_q);
spin_lock_irq(&sch->lock);
ret = cio_disable_subchannel(sch);
} while (ret == -EBUSY);
return ret;
}
void vfio_ccw_sch_io_todo(struct work_struct *work)
{
struct vfio_ccw_private *private;
struct irb *irb;
bool is_final;
bool cp_is_finished = false;
private = container_of(work, struct vfio_ccw_private, io_work);
irb = &private->irb;
is_final = !(scsw_actl(&irb->scsw) &
(SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
if (scsw_is_solicited(&irb->scsw)) {
cp_update_scsw(&private->cp, &irb->scsw);
if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING) {
cp_free(&private->cp);
cp_is_finished = true;
}
}
mutex_lock(&private->io_mutex);
memcpy(private->io_region->irb_area, irb, sizeof(*irb));
mutex_unlock(&private->io_mutex);
/*
* Reset to IDLE only if processing of a channel program
* has finished. Do not overwrite a possible processing
* state if the interrupt was unsolicited, or if the final
* interrupt was for HSCH or CSCH.
*/
if (cp_is_finished)
private->state = VFIO_CCW_STATE_IDLE;
if (private->io_trigger)
eventfd_signal(private->io_trigger);
}
void vfio_ccw_crw_todo(struct work_struct *work)
{
struct vfio_ccw_private *private;
private = container_of(work, struct vfio_ccw_private, crw_work);
if (!list_empty(&private->crw) && private->crw_trigger)
eventfd_signal(private->crw_trigger);
}
/*
* Css driver callbacks
*/
static void vfio_ccw_sch_irq(struct subchannel *sch)
{
struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
/*
* The subchannel should still be disabled at this point,
* so an interrupt would be quite surprising. As with an
* interrupt while the FSM is closed, let's attempt to
* disable the subchannel again.
*/
if (!private) {
VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: unexpected interrupt\n",
sch->schid.cssid, sch->schid.ssid,
sch->schid.sch_no);
cio_disable_subchannel(sch);
return;
}
inc_irq_stat(IRQIO_CIO);
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
}
static void vfio_ccw_free_parent(struct device *dev)
{
struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev);
kfree(parent);
}
static int vfio_ccw_sch_probe(struct subchannel *sch)
{
struct pmcw *pmcw = &sch->schib.pmcw;
struct vfio_ccw_parent *parent;
int ret = -ENOMEM;
if (pmcw->qf) {
dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
dev_name(&sch->dev));
return -ENODEV;
}
s390/vfio-ccw: Make mdev_types not look like a fake flex array The vfio-ccw driver and the vfio parent device provided by it (parent) support just a single mdev_type, and this is not likely to change any time soon. To match the mdev interfaces nicely initially the choice was made that mdev_types (which gets passed into mdev_register_parent()) shall be an array of pointers to struct mdev_type with a single element, and to make things worse it ended up being the last member. Now the problem with that is that before C99 the usual way to get something similar to a flexible array member was to use a trailing array of size 0 or 1. This is what I called fake flex array. For a while now the community is trying to get rid of fake flex arrays. And while mdev_types was not a fake flex array but an array of size one, because it can easily be and probably was mistaken for a fake flex array it got converted into a real C99 flex array with a compile time known constant size of one. As per [1] it was established that "only fake flexible arrays should be transformed into C99 flex-array members". Since IMHO the entire point of flex arrays is being flexible about the array size at run time, a C99 flex array is a poor fit for mdev_types. But an array of a size one is a poor fit as well for the reason stated above, let us try to get rid of the flex array without introducing back the one sized array. So, lets make mdev_types a pointer to struct mdev_type and pass in the address of that pointer as the 4th formal parameter of mdev_register_parent(). [1] https://lore.kernel.org/lkml/85863d7a-2d8b-4c1b-b76a-e2f40834a7a8@embeddedor.com/ Signed-off-by: Halil Pasic <pasic@linux.ibm.com> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Tested-by: Eric Farman <farman@linux.ibm.com> Link: https://lore.kernel.org/r/20250217100614.3043620-3-pasic@linux.ibm.com Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2025-02-17 11:06:14 +01:00
parent = kzalloc(sizeof(*parent), GFP_KERNEL);
if (!parent)
return -ENOMEM;
dev_set_name(&parent->dev, "parent");
parent->dev.parent = &sch->dev;
parent->dev.release = &vfio_ccw_free_parent;
ret = device_register(&parent->dev);
if (ret)
goto out_free;
dev_set_drvdata(&sch->dev, parent);
parent->mdev_type.sysfs_name = "io";
parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
s390/vfio-ccw: Make mdev_types not look like a fake flex array The vfio-ccw driver and the vfio parent device provided by it (parent) support just a single mdev_type, and this is not likely to change any time soon. To match the mdev interfaces nicely initially the choice was made that mdev_types (which gets passed into mdev_register_parent()) shall be an array of pointers to struct mdev_type with a single element, and to make things worse it ended up being the last member. Now the problem with that is that before C99 the usual way to get something similar to a flexible array member was to use a trailing array of size 0 or 1. This is what I called fake flex array. For a while now the community is trying to get rid of fake flex arrays. And while mdev_types was not a fake flex array but an array of size one, because it can easily be and probably was mistaken for a fake flex array it got converted into a real C99 flex array with a compile time known constant size of one. As per [1] it was established that "only fake flexible arrays should be transformed into C99 flex-array members". Since IMHO the entire point of flex arrays is being flexible about the array size at run time, a C99 flex array is a poor fit for mdev_types. But an array of a size one is a poor fit as well for the reason stated above, let us try to get rid of the flex array without introducing back the one sized array. So, lets make mdev_types a pointer to struct mdev_type and pass in the address of that pointer as the 4th formal parameter of mdev_register_parent(). [1] https://lore.kernel.org/lkml/85863d7a-2d8b-4c1b-b76a-e2f40834a7a8@embeddedor.com/ Signed-off-by: Halil Pasic <pasic@linux.ibm.com> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Tested-by: Eric Farman <farman@linux.ibm.com> Link: https://lore.kernel.org/r/20250217100614.3043620-3-pasic@linux.ibm.com Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2025-02-17 11:06:14 +01:00
parent->mdev_types = &parent->mdev_type;
ret = mdev_register_parent(&parent->parent, &sch->dev,
&vfio_ccw_mdev_driver,
s390/vfio-ccw: Make mdev_types not look like a fake flex array The vfio-ccw driver and the vfio parent device provided by it (parent) support just a single mdev_type, and this is not likely to change any time soon. To match the mdev interfaces nicely initially the choice was made that mdev_types (which gets passed into mdev_register_parent()) shall be an array of pointers to struct mdev_type with a single element, and to make things worse it ended up being the last member. Now the problem with that is that before C99 the usual way to get something similar to a flexible array member was to use a trailing array of size 0 or 1. This is what I called fake flex array. For a while now the community is trying to get rid of fake flex arrays. And while mdev_types was not a fake flex array but an array of size one, because it can easily be and probably was mistaken for a fake flex array it got converted into a real C99 flex array with a compile time known constant size of one. As per [1] it was established that "only fake flexible arrays should be transformed into C99 flex-array members". Since IMHO the entire point of flex arrays is being flexible about the array size at run time, a C99 flex array is a poor fit for mdev_types. But an array of a size one is a poor fit as well for the reason stated above, let us try to get rid of the flex array without introducing back the one sized array. So, lets make mdev_types a pointer to struct mdev_type and pass in the address of that pointer as the 4th formal parameter of mdev_register_parent(). [1] https://lore.kernel.org/lkml/85863d7a-2d8b-4c1b-b76a-e2f40834a7a8@embeddedor.com/ Signed-off-by: Halil Pasic <pasic@linux.ibm.com> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Tested-by: Eric Farman <farman@linux.ibm.com> Link: https://lore.kernel.org/r/20250217100614.3043620-3-pasic@linux.ibm.com Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2025-02-17 11:06:14 +01:00
&parent->mdev_types, 1);
if (ret)
goto out_unreg;
VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n",
sch->schid.cssid, sch->schid.ssid,
sch->schid.sch_no);
return 0;
out_unreg:
device_del(&parent->dev);
out_free:
put_device(&parent->dev);
dev_set_drvdata(&sch->dev, NULL);
return ret;
}
static void vfio_ccw_sch_remove(struct subchannel *sch)
{
struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
mdev_unregister_parent(&parent->parent);
device_unregister(&parent->dev);
dev_set_drvdata(&sch->dev, NULL);
VFIO_CCW_MSG_EVENT(4, "unbound from subchannel %x.%x.%04x\n",
sch->schid.cssid, sch->schid.ssid,
sch->schid.sch_no);
}
static void vfio_ccw_sch_shutdown(struct subchannel *sch)
{
struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
if (!private)
return;
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE);
vfio/ccw: Move FSM open/close to MDEV open/close Part of the confusion that has existed is the FSM lifecycle of subchannels between the common CSS driver and the vfio-ccw driver. During configuration, the FSM state goes from NOT_OPER to STANDBY to IDLE, but then back to NOT_OPER. For example: vfio_ccw_sch_probe: VFIO_CCW_STATE_NOT_OPER vfio_ccw_sch_probe: VFIO_CCW_STATE_STANDBY vfio_ccw_mdev_probe: VFIO_CCW_STATE_IDLE vfio_ccw_mdev_remove: VFIO_CCW_STATE_NOT_OPER vfio_ccw_sch_remove: VFIO_CCW_STATE_NOT_OPER vfio_ccw_sch_shutdown: VFIO_CCW_STATE_NOT_OPER Rearrange the open/close events to align with the mdev open/close, to better manage the memory and state of the devices as time progresses. Specifically, make mdev_open() perform the FSM open, and mdev_close() perform the FSM close instead of reset (which is both close and open). This makes the NOT_OPER state a dead-end path, indicating the device is probably not recoverable without fully probing and re-configuring the device. This has the nice side-effect of removing a number of special-cases where the FSM state is managed outside of the FSM itself (such as the aforementioned mdev_close() routine). Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Link: https://lore.kernel.org/r/20220707135737.720765-12-farman@linux.ibm.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-07-07 15:57:37 +02:00
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
}
/**
* vfio_ccw_sch_event - process subchannel event
* @sch: subchannel
* @process: non-zero if function is called in process context
*
* An unspecified event occurred for this subchannel. Adjust data according
* to the current operational state of the subchannel. Return zero when the
* event has been handled sufficiently or -EAGAIN when this function should
* be called again in process context.
*/
static int vfio_ccw_sch_event(struct subchannel *sch, int process)
{
struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
unsigned long flags;
int rc = -EAGAIN;
spin_lock_irqsave(&sch->lock, flags);
if (!device_is_registered(&sch->dev))
goto out_unlock;
if (work_pending(&sch->todo_work))
goto out_unlock;
rc = 0;
if (cio_update_schib(sch)) {
if (private)
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
}
out_unlock:
spin_unlock_irqrestore(&sch->lock, flags);
return rc;
}
static void vfio_ccw_queue_crw(struct vfio_ccw_private *private,
unsigned int rsc,
unsigned int erc,
unsigned int rsid)
{
struct vfio_ccw_crw *crw;
/*
* If unable to allocate a CRW, just drop the event and
* carry on. The guest will either see a later one or
* learn when it issues its own store subchannel.
*/
crw = kzalloc(sizeof(*crw), GFP_ATOMIC);
if (!crw)
return;
/*
* Build the CRW based on the inputs given to us.
*/
crw->crw.rsc = rsc;
crw->crw.erc = erc;
crw->crw.rsid = rsid;
list_add_tail(&crw->next, &private->crw);
queue_work(vfio_ccw_work_q, &private->crw_work);
}
static int vfio_ccw_chp_event(struct subchannel *sch,
struct chp_link *link, int event)
{
struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
int mask = chp_ssd_get_mask(&sch->ssd_info, link);
int retry = 255;
if (!private || !mask)
return 0;
trace_vfio_ccw_chp_event(sch->schid, mask, event);
VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n",
sch->schid.cssid,
sch->schid.ssid, sch->schid.sch_no,
mask, event);
if (cio_update_schib(sch))
return -ENODEV;
switch (event) {
case CHP_VARY_OFF:
/* Path logically turned off */
sch->opm &= ~mask;
sch->lpm &= ~mask;
if (sch->schib.pmcw.lpum & mask)
cio_cancel_halt_clear(sch, &retry);
break;
case CHP_OFFLINE:
/* Path is gone */
if (sch->schib.pmcw.lpum & mask)
cio_cancel_halt_clear(sch, &retry);
vfio_ccw_queue_crw(private, CRW_RSC_CPATH, CRW_ERC_PERRN,
link->chpid.id);
break;
case CHP_VARY_ON:
/* Path logically turned on */
sch->opm |= mask;
sch->lpm |= mask;
break;
case CHP_ONLINE:
/* Path became available */
sch->lpm |= mask & sch->opm;
vfio_ccw_queue_crw(private, CRW_RSC_CPATH, CRW_ERC_INIT,
link->chpid.id);
break;
}
return 0;
}
static struct css_device_id vfio_ccw_sch_ids[] = {
{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
{ /* end of list */ },
};
MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
static struct css_driver vfio_ccw_sch_driver = {
.drv = {
.name = "vfio_ccw",
.owner = THIS_MODULE,
},
.subchannel_type = vfio_ccw_sch_ids,
.irq = vfio_ccw_sch_irq,
.probe = vfio_ccw_sch_probe,
.remove = vfio_ccw_sch_remove,
.shutdown = vfio_ccw_sch_shutdown,
.sch_event = vfio_ccw_sch_event,
.chp_event = vfio_ccw_chp_event,
};
static int __init vfio_ccw_debug_init(void)
{
vfio_ccw_debug_msg_id = debug_register("vfio_ccw_msg", 16, 1,
11 * sizeof(long));
if (!vfio_ccw_debug_msg_id)
goto out_unregister;
debug_register_view(vfio_ccw_debug_msg_id, &debug_sprintf_view);
debug_set_level(vfio_ccw_debug_msg_id, 2);
vfio_ccw_debug_trace_id = debug_register("vfio_ccw_trace", 16, 1, 16);
if (!vfio_ccw_debug_trace_id)
goto out_unregister;
debug_register_view(vfio_ccw_debug_trace_id, &debug_hex_ascii_view);
debug_set_level(vfio_ccw_debug_trace_id, 2);
return 0;
out_unregister:
debug_unregister(vfio_ccw_debug_msg_id);
debug_unregister(vfio_ccw_debug_trace_id);
return -1;
}
static void vfio_ccw_debug_exit(void)
{
debug_unregister(vfio_ccw_debug_msg_id);
debug_unregister(vfio_ccw_debug_trace_id);
}
static void vfio_ccw_destroy_regions(void)
{
kmem_cache_destroy(vfio_ccw_crw_region);
kmem_cache_destroy(vfio_ccw_schib_region);
kmem_cache_destroy(vfio_ccw_cmd_region);
kmem_cache_destroy(vfio_ccw_io_region);
}
static int __init vfio_ccw_sch_init(void)
{
int ret;
ret = vfio_ccw_debug_init();
if (ret)
return ret;
vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
if (!vfio_ccw_work_q) {
ret = -ENOMEM;
goto out_regions;
}
s390/cio: Refactor alloc of ccw_io_region If I attach a vfio-ccw device to my guest, I get the following warning on the host when the host kernel is CONFIG_HARDENED_USERCOPY=y [250757.595325] Bad or missing usercopy whitelist? Kernel memory overwrite attempt detected to SLUB object 'dma-kmalloc-512' (offset 64, size 124)! [250757.595365] WARNING: CPU: 2 PID: 10958 at mm/usercopy.c:81 usercopy_warn+0xac/0xd8 [250757.595369] Modules linked in: kvm vhost_net vhost tap xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c devlink tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc dm_multipath s390_trng crc32_vx_s390 ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha1_s390 eadm_sch tape_3590 tape tape_class qeth_l2 qeth ccwgroup vfio_ccw vfio_mdev zcrypt_cex4 mdev vfio_iommu_type1 zcrypt vfio sha256_s390 sha_common zfcp scsi_transport_fc qdio dasd_eckd_mod dasd_mod [250757.595424] CPU: 2 PID: 10958 Comm: CPU 2/KVM Not tainted 4.18.0-derp #2 [250757.595426] Hardware name: IBM 3906 M05 780 (LPAR) ...snip regs... [250757.595523] Call Trace: [250757.595529] ([<0000000000349210>] usercopy_warn+0xa8/0xd8) [250757.595535] [<000000000032daaa>] __check_heap_object+0xfa/0x160 [250757.595540] [<0000000000349396>] __check_object_size+0x156/0x1d0 [250757.595547] [<000003ff80332d04>] vfio_ccw_mdev_write+0x74/0x148 [vfio_ccw] [250757.595552] [<000000000034ed12>] __vfs_write+0x3a/0x188 [250757.595556] [<000000000034f040>] vfs_write+0xa8/0x1b8 [250757.595559] [<000000000034f4e6>] ksys_pwrite64+0x86/0xc0 [250757.595568] [<00000000008959a0>] system_call+0xdc/0x2b0 [250757.595570] Last Breaking-Event-Address: [250757.595573] [<0000000000349210>] usercopy_warn+0xa8/0xd8 While vfio_ccw_mdev_{write|read} validates that the input position/count does not run over the ccw_io_region struct, the usercopy code that does copy_{to|from}_user doesn't necessarily know this. It sees the variable length and gets worried that it's affecting a normal kmalloc'd struct, and generates the above warning. Adjust how the ccw_io_region is alloc'd with a whitelist to remove this warning. The boundary checking will continue to do its thing. Signed-off-by: Eric Farman <farman@linux.ibm.com> Message-Id: <20180921204013.95804-3-farman@linux.ibm.com> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
2018-09-21 22:40:13 +02:00
vfio_ccw_io_region = kmem_cache_create_usercopy("vfio_ccw_io_region",
sizeof(struct ccw_io_region), 0,
SLAB_ACCOUNT, 0,
sizeof(struct ccw_io_region), NULL);
if (!vfio_ccw_io_region) {
ret = -ENOMEM;
goto out_regions;
}
vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
sizeof(struct ccw_cmd_region), 0,
SLAB_ACCOUNT, 0,
sizeof(struct ccw_cmd_region), NULL);
if (!vfio_ccw_cmd_region) {
ret = -ENOMEM;
goto out_regions;
}
s390/cio: Refactor alloc of ccw_io_region If I attach a vfio-ccw device to my guest, I get the following warning on the host when the host kernel is CONFIG_HARDENED_USERCOPY=y [250757.595325] Bad or missing usercopy whitelist? Kernel memory overwrite attempt detected to SLUB object 'dma-kmalloc-512' (offset 64, size 124)! [250757.595365] WARNING: CPU: 2 PID: 10958 at mm/usercopy.c:81 usercopy_warn+0xac/0xd8 [250757.595369] Modules linked in: kvm vhost_net vhost tap xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c devlink tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc dm_multipath s390_trng crc32_vx_s390 ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha1_s390 eadm_sch tape_3590 tape tape_class qeth_l2 qeth ccwgroup vfio_ccw vfio_mdev zcrypt_cex4 mdev vfio_iommu_type1 zcrypt vfio sha256_s390 sha_common zfcp scsi_transport_fc qdio dasd_eckd_mod dasd_mod [250757.595424] CPU: 2 PID: 10958 Comm: CPU 2/KVM Not tainted 4.18.0-derp #2 [250757.595426] Hardware name: IBM 3906 M05 780 (LPAR) ...snip regs... [250757.595523] Call Trace: [250757.595529] ([<0000000000349210>] usercopy_warn+0xa8/0xd8) [250757.595535] [<000000000032daaa>] __check_heap_object+0xfa/0x160 [250757.595540] [<0000000000349396>] __check_object_size+0x156/0x1d0 [250757.595547] [<000003ff80332d04>] vfio_ccw_mdev_write+0x74/0x148 [vfio_ccw] [250757.595552] [<000000000034ed12>] __vfs_write+0x3a/0x188 [250757.595556] [<000000000034f040>] vfs_write+0xa8/0x1b8 [250757.595559] [<000000000034f4e6>] ksys_pwrite64+0x86/0xc0 [250757.595568] [<00000000008959a0>] system_call+0xdc/0x2b0 [250757.595570] Last Breaking-Event-Address: [250757.595573] [<0000000000349210>] usercopy_warn+0xa8/0xd8 While vfio_ccw_mdev_{write|read} validates that the input position/count does not run over the ccw_io_region struct, the usercopy code that does copy_{to|from}_user doesn't necessarily know this. It sees the variable length and gets worried that it's affecting a normal kmalloc'd struct, and generates the above warning. Adjust how the ccw_io_region is alloc'd with a whitelist to remove this warning. The boundary checking will continue to do its thing. Signed-off-by: Eric Farman <farman@linux.ibm.com> Message-Id: <20180921204013.95804-3-farman@linux.ibm.com> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
2018-09-21 22:40:13 +02:00
vfio_ccw_schib_region = kmem_cache_create_usercopy("vfio_ccw_schib_region",
sizeof(struct ccw_schib_region), 0,
SLAB_ACCOUNT, 0,
sizeof(struct ccw_schib_region), NULL);
if (!vfio_ccw_schib_region) {
ret = -ENOMEM;
goto out_regions;
}
vfio_ccw_crw_region = kmem_cache_create_usercopy("vfio_ccw_crw_region",
sizeof(struct ccw_crw_region), 0,
SLAB_ACCOUNT, 0,
sizeof(struct ccw_crw_region), NULL);
if (!vfio_ccw_crw_region) {
ret = -ENOMEM;
goto out_regions;
}
ret = mdev_register_driver(&vfio_ccw_mdev_driver);
if (ret)
goto out_regions;
isc_register(VFIO_CCW_ISC);
ret = css_driver_register(&vfio_ccw_sch_driver);
if (ret) {
isc_unregister(VFIO_CCW_ISC);
goto out_driver;
}
return ret;
out_driver:
mdev_unregister_driver(&vfio_ccw_mdev_driver);
out_regions:
vfio_ccw_destroy_regions();
destroy_workqueue(vfio_ccw_work_q);
vfio_ccw_debug_exit();
return ret;
}
static void __exit vfio_ccw_sch_exit(void)
{
css_driver_unregister(&vfio_ccw_sch_driver);
mdev_unregister_driver(&vfio_ccw_mdev_driver);
isc_unregister(VFIO_CCW_ISC);
vfio_ccw_destroy_regions();
destroy_workqueue(vfio_ccw_work_q);
vfio_ccw_debug_exit();
}
module_init(vfio_ccw_sch_init);
module_exit(vfio_ccw_sch_exit);
MODULE_DESCRIPTION("VFIO based Subchannel device driver");
MODULE_LICENSE("GPL v2");