2019-06-03 07:44:50 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2015-12-01 15:02:35 +01:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2015, 2016 ARM Ltd.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <kvm/arm_vgic.h>
|
2019-08-23 11:34:16 +01:00
|
|
|
#include <asm/kvm_emulate.h>
|
2015-12-01 15:02:35 +01:00
|
|
|
#include <asm/kvm_mmu.h>
|
|
|
|
#include "vgic.h"
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
/*
|
|
|
|
* Initialization rules: there are multiple stages to the vgic
|
2017-03-18 13:40:37 +01:00
|
|
|
* initialization, both for the distributor and the CPU interfaces. The basic
|
|
|
|
* idea is that even though the VGIC is not functional or not requested from
|
|
|
|
* user space, the critical path of the run loop can still call VGIC functions
|
|
|
|
* that just won't do anything, without them having to check additional
|
|
|
|
* initialization flags to ensure they don't look at uninitialized data
|
|
|
|
* structures.
|
2015-12-21 18:09:38 +01:00
|
|
|
*
|
|
|
|
* Distributor:
|
|
|
|
*
|
|
|
|
* - kvm_vgic_early_init(): initialization of static data that doesn't
|
|
|
|
* depend on any sizing information or emulation type. No allocation
|
|
|
|
* is allowed there.
|
|
|
|
*
|
|
|
|
* - vgic_init(): allocation and initialization of the generic data
|
|
|
|
* structures that depend on sizing information (number of CPUs,
|
|
|
|
* number of interrupts). Also initializes the vcpu specific data
|
|
|
|
* structures. Can be executed lazily for GICv2.
|
|
|
|
*
|
|
|
|
* CPU Interface:
|
|
|
|
*
|
2025-02-12 18:25:58 +00:00
|
|
|
* - kvm_vgic_vcpu_init(): initialization of static data that doesn't depend
|
|
|
|
* on any sizing information. Private interrupts are allocated if not
|
|
|
|
* already allocated at vgic-creation time.
|
2015-12-21 18:09:38 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* EARLY INIT */
|
|
|
|
|
2017-03-18 13:40:37 +01:00
|
|
|
/**
|
|
|
|
* kvm_vgic_early_init() - Initialize static VGIC VCPU data structures
|
|
|
|
* @kvm: The VM whose VGIC districutor should be initialized
|
|
|
|
*
|
|
|
|
* Only do initialization of static structures that don't require any
|
|
|
|
* allocation or sizing information from userspace. vgic_init() called
|
|
|
|
* kvm_vgic_dist_init() which takes care of the rest.
|
2015-12-21 18:09:38 +01:00
|
|
|
*/
|
|
|
|
void kvm_vgic_early_init(struct kvm *kvm)
|
|
|
|
{
|
2017-03-18 13:40:37 +01:00
|
|
|
struct vgic_dist *dist = &kvm->arch.vgic;
|
|
|
|
|
2024-02-21 05:42:44 +00:00
|
|
|
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
|
2015-12-21 18:09:38 +01:00
|
|
|
}
|
|
|
|
|
2015-12-21 14:50:50 +01:00
|
|
|
/* CREATION */
|
|
|
|
|
2025-02-12 18:25:58 +00:00
|
|
|
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
|
|
|
|
|
2015-12-21 14:50:50 +01:00
|
|
|
/**
|
|
|
|
* kvm_vgic_create: triggered by the instantiation of the VGIC device by
|
|
|
|
* user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
|
|
|
|
* or through the generic KVM_CREATE_DEVICE API ioctl.
|
|
|
|
* irqchip_in_kernel() tells you if this function succeeded or not.
|
2015-12-21 18:09:38 +01:00
|
|
|
* @kvm: kvm struct pointer
|
|
|
|
* @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
|
2015-12-21 14:50:50 +01:00
|
|
|
*/
|
|
|
|
int kvm_vgic_create(struct kvm *kvm, u32 type)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
|
|
|
int ret;
|
2015-12-21 14:50:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This function is also called by the KVM_CREATE_IRQCHIP handler,
|
|
|
|
* which had no chance yet to check the availability of the GICv2
|
|
|
|
* emulation. So check this here again. KVM_CREATE_DEVICE does
|
|
|
|
* the proper checks already.
|
|
|
|
*/
|
|
|
|
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
|
2016-08-09 19:13:01 +02:00
|
|
|
!kvm_vgic_global_state.can_emulate_gicv2)
|
|
|
|
return -ENODEV;
|
2015-12-21 14:50:50 +01:00
|
|
|
|
KVM: arm64: vgic-init: Plug vCPU vs. VGIC creation race
syzkaller has found another ugly race in the VGIC, this time dealing
with VGIC creation. Since kvm_vgic_create() doesn't sufficiently protect
against in-flight vCPU creations, it is possible to get a vCPU into the
kernel w/ an in-kernel VGIC but no allocation of private IRQs:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000d20
Mem abort info:
ESR = 0x0000000096000046
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x06: level 2 translation fault
Data abort info:
ISV = 0, ISS = 0x00000046, ISS2 = 0x00000000
CM = 0, WnR = 1, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=0000000103e4f000
[0000000000000d20] pgd=0800000102e1c403, p4d=0800000102e1c403, pud=0800000101146403, pmd=0000000000000000
Internal error: Oops: 0000000096000046 [#1] PREEMPT SMP
CPU: 9 UID: 0 PID: 246 Comm: test Not tainted 6.14.0-rc6-00097-g0c90821f5db8 #16
Hardware name: linux,dummy-virt (DT)
pstate: 814020c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
pc : _raw_spin_lock_irqsave+0x34/0x8c
lr : kvm_vgic_set_owner+0x54/0xa4
sp : ffff80008086ba20
x29: ffff80008086ba20 x28: ffff0000c19b5640 x27: 0000000000000000
x26: 0000000000000000 x25: ffff0000c4879bd0 x24: 000000000000001e
x23: 0000000000000000 x22: 0000000000000000 x21: ffff0000c487af80
x20: ffff0000c487af18 x19: 0000000000000000 x18: 0000001afadd5a8b
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000001
x14: ffff0000c19b56c0 x13: 0030c9adf9d9889e x12: ffffc263710e1908
x11: 0000001afb0d74f2 x10: e0966b840b373664 x9 : ec806bf7d6a57cd5
x8 : ffff80008086b980 x7 : 0000000000000001 x6 : 0000000000000001
x5 : 0000000080800054 x4 : 4ec4ec4ec4ec4ec5 x3 : 0000000000000000
x2 : 0000000000000001 x1 : 0000000000000000 x0 : 0000000000000d20
Call trace:
_raw_spin_lock_irqsave+0x34/0x8c (P)
kvm_vgic_set_owner+0x54/0xa4
kvm_timer_enable+0xf4/0x274
kvm_arch_vcpu_run_pid_change+0xe0/0x380
kvm_vcpu_ioctl+0x93c/0x9e0
__arm64_sys_ioctl+0xb4/0xec
invoke_syscall+0x48/0x110
el0_svc_common.constprop.0+0x40/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x30/0xd0
el0t_64_sync_handler+0x10c/0x138
el0t_64_sync+0x198/0x19c
Code: b9000841 d503201f 52800001 52800022 (88e17c02)
---[ end trace 0000000000000000 ]---
Plug the race by explicitly checking for an in-progress vCPU creation
and failing kvm_vgic_create() when that's the case. Add some comments to
document all the things kvm_vgic_create() is trying to guard against
too.
Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Tested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/r/20250523194722.4066715-6-oliver.upton@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-05-23 12:47:22 -07:00
|
|
|
/*
|
|
|
|
* Ensure mutual exclusion with vCPU creation and any vCPU ioctls by:
|
|
|
|
*
|
|
|
|
* - Holding kvm->lock to prevent KVM_CREATE_VCPU from reaching
|
|
|
|
* kvm_arch_vcpu_precreate() and ensuring created_vcpus is stable.
|
|
|
|
* This alone is insufficient, as kvm_vm_ioctl_create_vcpu() drops
|
|
|
|
* the kvm->lock before completing the vCPU creation.
|
|
|
|
*/
|
2023-03-27 16:47:47 +00:00
|
|
|
lockdep_assert_held(&kvm->lock);
|
|
|
|
|
KVM: arm64: vgic-init: Plug vCPU vs. VGIC creation race
syzkaller has found another ugly race in the VGIC, this time dealing
with VGIC creation. Since kvm_vgic_create() doesn't sufficiently protect
against in-flight vCPU creations, it is possible to get a vCPU into the
kernel w/ an in-kernel VGIC but no allocation of private IRQs:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000d20
Mem abort info:
ESR = 0x0000000096000046
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x06: level 2 translation fault
Data abort info:
ISV = 0, ISS = 0x00000046, ISS2 = 0x00000000
CM = 0, WnR = 1, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=0000000103e4f000
[0000000000000d20] pgd=0800000102e1c403, p4d=0800000102e1c403, pud=0800000101146403, pmd=0000000000000000
Internal error: Oops: 0000000096000046 [#1] PREEMPT SMP
CPU: 9 UID: 0 PID: 246 Comm: test Not tainted 6.14.0-rc6-00097-g0c90821f5db8 #16
Hardware name: linux,dummy-virt (DT)
pstate: 814020c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
pc : _raw_spin_lock_irqsave+0x34/0x8c
lr : kvm_vgic_set_owner+0x54/0xa4
sp : ffff80008086ba20
x29: ffff80008086ba20 x28: ffff0000c19b5640 x27: 0000000000000000
x26: 0000000000000000 x25: ffff0000c4879bd0 x24: 000000000000001e
x23: 0000000000000000 x22: 0000000000000000 x21: ffff0000c487af80
x20: ffff0000c487af18 x19: 0000000000000000 x18: 0000001afadd5a8b
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000001
x14: ffff0000c19b56c0 x13: 0030c9adf9d9889e x12: ffffc263710e1908
x11: 0000001afb0d74f2 x10: e0966b840b373664 x9 : ec806bf7d6a57cd5
x8 : ffff80008086b980 x7 : 0000000000000001 x6 : 0000000000000001
x5 : 0000000080800054 x4 : 4ec4ec4ec4ec4ec5 x3 : 0000000000000000
x2 : 0000000000000001 x1 : 0000000000000000 x0 : 0000000000000d20
Call trace:
_raw_spin_lock_irqsave+0x34/0x8c (P)
kvm_vgic_set_owner+0x54/0xa4
kvm_timer_enable+0xf4/0x274
kvm_arch_vcpu_run_pid_change+0xe0/0x380
kvm_vcpu_ioctl+0x93c/0x9e0
__arm64_sys_ioctl+0xb4/0xec
invoke_syscall+0x48/0x110
el0_svc_common.constprop.0+0x40/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x30/0xd0
el0t_64_sync_handler+0x10c/0x138
el0t_64_sync+0x198/0x19c
Code: b9000841 d503201f 52800001 52800022 (88e17c02)
---[ end trace 0000000000000000 ]---
Plug the race by explicitly checking for an in-progress vCPU creation
and failing kvm_vgic_create() when that's the case. Add some comments to
document all the things kvm_vgic_create() is trying to guard against
too.
Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Tested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/r/20250523194722.4066715-6-oliver.upton@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-05-23 12:47:22 -07:00
|
|
|
/*
|
|
|
|
* - Acquiring the vCPU mutex for every *online* vCPU to prevent
|
|
|
|
* concurrent vCPU ioctls for vCPUs already visible to userspace.
|
|
|
|
*/
|
2015-12-21 14:50:50 +01:00
|
|
|
ret = -EBUSY;
|
2025-05-12 14:04:06 -04:00
|
|
|
if (kvm_trylock_all_vcpus(kvm))
|
2019-11-30 10:45:18 +08:00
|
|
|
return ret;
|
2015-12-21 14:50:50 +01:00
|
|
|
|
KVM: arm64: vgic-init: Plug vCPU vs. VGIC creation race
syzkaller has found another ugly race in the VGIC, this time dealing
with VGIC creation. Since kvm_vgic_create() doesn't sufficiently protect
against in-flight vCPU creations, it is possible to get a vCPU into the
kernel w/ an in-kernel VGIC but no allocation of private IRQs:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000d20
Mem abort info:
ESR = 0x0000000096000046
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x06: level 2 translation fault
Data abort info:
ISV = 0, ISS = 0x00000046, ISS2 = 0x00000000
CM = 0, WnR = 1, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=0000000103e4f000
[0000000000000d20] pgd=0800000102e1c403, p4d=0800000102e1c403, pud=0800000101146403, pmd=0000000000000000
Internal error: Oops: 0000000096000046 [#1] PREEMPT SMP
CPU: 9 UID: 0 PID: 246 Comm: test Not tainted 6.14.0-rc6-00097-g0c90821f5db8 #16
Hardware name: linux,dummy-virt (DT)
pstate: 814020c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
pc : _raw_spin_lock_irqsave+0x34/0x8c
lr : kvm_vgic_set_owner+0x54/0xa4
sp : ffff80008086ba20
x29: ffff80008086ba20 x28: ffff0000c19b5640 x27: 0000000000000000
x26: 0000000000000000 x25: ffff0000c4879bd0 x24: 000000000000001e
x23: 0000000000000000 x22: 0000000000000000 x21: ffff0000c487af80
x20: ffff0000c487af18 x19: 0000000000000000 x18: 0000001afadd5a8b
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000001
x14: ffff0000c19b56c0 x13: 0030c9adf9d9889e x12: ffffc263710e1908
x11: 0000001afb0d74f2 x10: e0966b840b373664 x9 : ec806bf7d6a57cd5
x8 : ffff80008086b980 x7 : 0000000000000001 x6 : 0000000000000001
x5 : 0000000080800054 x4 : 4ec4ec4ec4ec4ec5 x3 : 0000000000000000
x2 : 0000000000000001 x1 : 0000000000000000 x0 : 0000000000000d20
Call trace:
_raw_spin_lock_irqsave+0x34/0x8c (P)
kvm_vgic_set_owner+0x54/0xa4
kvm_timer_enable+0xf4/0x274
kvm_arch_vcpu_run_pid_change+0xe0/0x380
kvm_vcpu_ioctl+0x93c/0x9e0
__arm64_sys_ioctl+0xb4/0xec
invoke_syscall+0x48/0x110
el0_svc_common.constprop.0+0x40/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x30/0xd0
el0t_64_sync_handler+0x10c/0x138
el0t_64_sync+0x198/0x19c
Code: b9000841 d503201f 52800001 52800022 (88e17c02)
---[ end trace 0000000000000000 ]---
Plug the race by explicitly checking for an in-progress vCPU creation
and failing kvm_vgic_create() when that's the case. Add some comments to
document all the things kvm_vgic_create() is trying to guard against
too.
Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Tested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/r/20250523194722.4066715-6-oliver.upton@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-05-23 12:47:22 -07:00
|
|
|
/*
|
|
|
|
* - Taking the config_lock which protects VGIC data structures such
|
|
|
|
* as the per-vCPU arrays of private IRQs (SGIs, PPIs).
|
|
|
|
*/
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_lock(&kvm->arch.config_lock);
|
|
|
|
|
KVM: arm64: vgic-init: Plug vCPU vs. VGIC creation race
syzkaller has found another ugly race in the VGIC, this time dealing
with VGIC creation. Since kvm_vgic_create() doesn't sufficiently protect
against in-flight vCPU creations, it is possible to get a vCPU into the
kernel w/ an in-kernel VGIC but no allocation of private IRQs:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000d20
Mem abort info:
ESR = 0x0000000096000046
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x06: level 2 translation fault
Data abort info:
ISV = 0, ISS = 0x00000046, ISS2 = 0x00000000
CM = 0, WnR = 1, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=0000000103e4f000
[0000000000000d20] pgd=0800000102e1c403, p4d=0800000102e1c403, pud=0800000101146403, pmd=0000000000000000
Internal error: Oops: 0000000096000046 [#1] PREEMPT SMP
CPU: 9 UID: 0 PID: 246 Comm: test Not tainted 6.14.0-rc6-00097-g0c90821f5db8 #16
Hardware name: linux,dummy-virt (DT)
pstate: 814020c5 (Nzcv daIF +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
pc : _raw_spin_lock_irqsave+0x34/0x8c
lr : kvm_vgic_set_owner+0x54/0xa4
sp : ffff80008086ba20
x29: ffff80008086ba20 x28: ffff0000c19b5640 x27: 0000000000000000
x26: 0000000000000000 x25: ffff0000c4879bd0 x24: 000000000000001e
x23: 0000000000000000 x22: 0000000000000000 x21: ffff0000c487af80
x20: ffff0000c487af18 x19: 0000000000000000 x18: 0000001afadd5a8b
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000001
x14: ffff0000c19b56c0 x13: 0030c9adf9d9889e x12: ffffc263710e1908
x11: 0000001afb0d74f2 x10: e0966b840b373664 x9 : ec806bf7d6a57cd5
x8 : ffff80008086b980 x7 : 0000000000000001 x6 : 0000000000000001
x5 : 0000000080800054 x4 : 4ec4ec4ec4ec4ec5 x3 : 0000000000000000
x2 : 0000000000000001 x1 : 0000000000000000 x0 : 0000000000000d20
Call trace:
_raw_spin_lock_irqsave+0x34/0x8c (P)
kvm_vgic_set_owner+0x54/0xa4
kvm_timer_enable+0xf4/0x274
kvm_arch_vcpu_run_pid_change+0xe0/0x380
kvm_vcpu_ioctl+0x93c/0x9e0
__arm64_sys_ioctl+0xb4/0xec
invoke_syscall+0x48/0x110
el0_svc_common.constprop.0+0x40/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x30/0xd0
el0t_64_sync_handler+0x10c/0x138
el0t_64_sync+0x198/0x19c
Code: b9000841 d503201f 52800001 52800022 (88e17c02)
---[ end trace 0000000000000000 ]---
Plug the race by explicitly checking for an in-progress vCPU creation
and failing kvm_vgic_create() when that's the case. Add some comments to
document all the things kvm_vgic_create() is trying to guard against
too.
Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Tested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/r/20250523194722.4066715-6-oliver.upton@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-05-23 12:47:22 -07:00
|
|
|
/*
|
|
|
|
* - Bailing on the entire thing if a vCPU is in the middle of creation,
|
|
|
|
* dropped the kvm->lock, but hasn't reached kvm_arch_vcpu_create().
|
|
|
|
*
|
|
|
|
* The whole combination of this guarantees that no vCPU can get into
|
|
|
|
* KVM with a VGIC configuration inconsistent with the VM's VGIC.
|
|
|
|
*/
|
|
|
|
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
|
|
|
|
goto out_unlock;
|
|
|
|
|
2023-03-27 16:47:47 +00:00
|
|
|
if (irqchip_in_kernel(kvm)) {
|
|
|
|
ret = -EEXIST;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2015-12-21 14:50:50 +01:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
2021-10-14 12:13:06 +01:00
|
|
|
if (vcpu_has_run_once(vcpu))
|
2015-12-21 14:50:50 +01:00
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
|
2022-03-04 11:48:38 -08:00
|
|
|
kvm->max_vcpus = VGIC_V2_MAX_CPUS;
|
2015-12-21 14:50:50 +01:00
|
|
|
else
|
2022-03-04 11:48:38 -08:00
|
|
|
kvm->max_vcpus = VGIC_V3_MAX_CPUS;
|
2015-12-21 14:50:50 +01:00
|
|
|
|
2022-03-04 11:48:38 -08:00
|
|
|
if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
|
2015-12-21 14:50:50 +01:00
|
|
|
ret = -E2BIG;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2025-02-12 18:25:58 +00:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
ret = vgic_allocate_private_irqs_locked(vcpu, type);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
|
|
|
kfree(vgic_cpu->private_irqs);
|
|
|
|
vgic_cpu->private_irqs = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2015-12-21 14:50:50 +01:00
|
|
|
kvm->arch.vgic.in_kernel = true;
|
|
|
|
kvm->arch.vgic.vgic_model = type;
|
2025-07-23 23:28:02 -07:00
|
|
|
kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
|
2015-12-21 14:50:50 +01:00
|
|
|
|
|
|
|
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
|
2018-05-22 09:55:08 +02:00
|
|
|
|
|
|
|
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
|
|
|
|
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
|
|
|
|
else
|
|
|
|
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
|
2015-12-21 14:50:50 +01:00
|
|
|
|
2025-07-23 23:28:03 -07:00
|
|
|
if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
|
|
|
|
kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
|
|
|
|
|
2015-12-21 14:50:50 +01:00
|
|
|
out_unlock:
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_unlock(&kvm->arch.config_lock);
|
2025-05-12 14:04:06 -04:00
|
|
|
kvm_unlock_all_vcpus(kvm);
|
2015-12-21 14:50:50 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
/* INIT/DESTROY */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* kvm_vgic_dist_init: initialize the dist data structures
|
|
|
|
* @kvm: kvm struct pointer
|
|
|
|
* @nr_spis: number of spis, frozen by caller
|
|
|
|
*/
|
|
|
|
static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
|
|
|
|
{
|
|
|
|
struct vgic_dist *dist = &kvm->arch.vgic;
|
|
|
|
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
|
|
|
|
int i;
|
|
|
|
|
2021-09-07 20:31:11 +08:00
|
|
|
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
|
2015-12-21 18:09:38 +01:00
|
|
|
if (!dist->spis)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the following code we do not take the irq struct lock since
|
|
|
|
* no other action on irq structs can happen while the VGIC is
|
|
|
|
* not initialized yet:
|
|
|
|
* If someone wants to inject an interrupt or does a MMIO access, we
|
|
|
|
* require prior initialization in case of a virtual GICv3 or trigger
|
|
|
|
* initialization when using a virtual GICv2.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < nr_spis; i++) {
|
|
|
|
struct vgic_irq *irq = &dist->spis[i];
|
|
|
|
|
|
|
|
irq->intid = i + VGIC_NR_PRIVATE_IRQS;
|
|
|
|
INIT_LIST_HEAD(&irq->ap_list);
|
2019-01-07 15:06:15 +00:00
|
|
|
raw_spin_lock_init(&irq->irq_lock);
|
2015-12-21 18:09:38 +01:00
|
|
|
irq->vcpu = NULL;
|
|
|
|
irq->target_vcpu = vcpu0;
|
2016-07-15 12:43:27 +01:00
|
|
|
kref_init(&irq->refcount);
|
2019-08-23 11:34:16 +01:00
|
|
|
switch (dist->vgic_model) {
|
|
|
|
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
2015-12-21 18:09:38 +01:00
|
|
|
irq->targets = 0;
|
2018-07-16 15:06:21 +02:00
|
|
|
irq->group = 0;
|
2019-08-23 11:34:16 +01:00
|
|
|
break;
|
|
|
|
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
2015-12-21 18:09:38 +01:00
|
|
|
irq->mpidr = 0;
|
2018-07-16 15:06:21 +02:00
|
|
|
irq->group = 1;
|
2019-08-23 11:34:16 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
kfree(dist->spis);
|
2019-11-28 14:38:48 +08:00
|
|
|
dist->spis = NULL;
|
2019-08-23 11:34:16 +01:00
|
|
|
return -EINVAL;
|
2018-07-16 15:06:21 +02:00
|
|
|
}
|
2015-12-21 18:09:38 +01:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-02-25 17:29:24 +00:00
|
|
|
/* Default GICv3 Maintenance Interrupt INTID, as per SBSA */
|
|
|
|
#define DEFAULT_MI_INTID 25
|
|
|
|
|
|
|
|
int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
guard(mutex)(&vcpu->kvm->arch.config_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Matching the tradition established with the timers, provide
|
|
|
|
* a default PPI for the maintenance interrupt. It makes
|
|
|
|
* things easier to reason about.
|
|
|
|
*/
|
|
|
|
if (vcpu->kvm->arch.vgic.mi_intid == 0)
|
|
|
|
vcpu->kvm->arch.vgic.mi_intid = DEFAULT_MI_INTID;
|
|
|
|
ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.mi_intid, vcpu);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-02-12 18:25:58 +00:00
|
|
|
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
|
2017-05-08 12:30:24 +02:00
|
|
|
{
|
2018-05-22 09:55:13 +02:00
|
|
|
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
|
|
|
int i;
|
|
|
|
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
lockdep_assert_held(&vcpu->kvm->arch.config_lock);
|
2018-05-22 09:55:14 +02:00
|
|
|
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
if (vgic_cpu->private_irqs)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
vgic_cpu->private_irqs = kcalloc(VGIC_NR_PRIVATE_IRQS,
|
|
|
|
sizeof(struct vgic_irq),
|
|
|
|
GFP_KERNEL_ACCOUNT);
|
|
|
|
|
|
|
|
if (!vgic_cpu->private_irqs)
|
|
|
|
return -ENOMEM;
|
2018-05-22 09:55:13 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable and configure all SGIs to be edge-triggered and
|
|
|
|
* configure all PPIs as level-triggered.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
|
|
|
|
struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&irq->ap_list);
|
2019-01-07 15:06:15 +00:00
|
|
|
raw_spin_lock_init(&irq->irq_lock);
|
2018-05-22 09:55:13 +02:00
|
|
|
irq->intid = i;
|
|
|
|
irq->vcpu = NULL;
|
|
|
|
irq->target_vcpu = vcpu;
|
|
|
|
kref_init(&irq->refcount);
|
|
|
|
if (vgic_irq_is_sgi(i)) {
|
|
|
|
/* SGIs */
|
|
|
|
irq->enabled = 1;
|
|
|
|
irq->config = VGIC_CONFIG_EDGE;
|
|
|
|
} else {
|
|
|
|
/* PPIs */
|
|
|
|
irq->config = VGIC_CONFIG_LEVEL;
|
|
|
|
}
|
2025-02-12 18:25:58 +00:00
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
|
|
|
irq->group = 1;
|
|
|
|
irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
|
|
|
break;
|
|
|
|
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
|
|
|
irq->group = 0;
|
|
|
|
irq->targets = BIT(vcpu->vcpu_id);
|
|
|
|
break;
|
|
|
|
}
|
2018-05-22 09:55:13 +02:00
|
|
|
}
|
2017-05-08 12:30:24 +02:00
|
|
|
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-02-12 18:25:58 +00:00
|
|
|
static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu, u32 type)
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
mutex_lock(&vcpu->kvm->arch.config_lock);
|
2025-02-12 18:25:58 +00:00
|
|
|
ret = vgic_allocate_private_irqs_locked(vcpu, type);
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
|
|
|
|
* structures and register VCPU-specific KVM iodevs
|
|
|
|
*
|
|
|
|
* @vcpu: pointer to the VCPU being created and initialized
|
|
|
|
*
|
|
|
|
* Only do initialization, but do not actually enable the
|
|
|
|
* VGIC CPU interface
|
|
|
|
*/
|
|
|
|
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
|
|
|
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
|
|
|
|
raw_spin_lock_init(&vgic_cpu->ap_list_lock);
|
|
|
|
atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
|
|
|
|
|
2017-05-08 12:30:24 +02:00
|
|
|
if (!irqchip_in_kernel(vcpu->kvm))
|
|
|
|
return 0;
|
|
|
|
|
2025-02-12 18:25:58 +00:00
|
|
|
ret = vgic_allocate_private_irqs(vcpu, dist->vgic_model);
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-05-08 12:30:24 +02:00
|
|
|
/*
|
|
|
|
* If we are creating a VCPU with a GICv3 we must also register the
|
|
|
|
* KVM io device for the redistributor that belongs to this VCPU.
|
|
|
|
*/
|
2017-05-17 13:12:51 +02:00
|
|
|
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
mutex_lock(&vcpu->kvm->slots_lock);
|
2017-05-08 12:30:24 +02:00
|
|
|
ret = vgic_register_redist_iodev(vcpu);
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
mutex_unlock(&vcpu->kvm->slots_lock);
|
2017-05-17 13:12:51 +02:00
|
|
|
}
|
2017-05-08 12:30:24 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-05-08 12:09:13 +02:00
|
|
|
static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
|
2015-12-21 18:09:38 +01:00
|
|
|
{
|
|
|
|
if (kvm_vgic_global_state.type == VGIC_V2)
|
|
|
|
vgic_v2_enable(vcpu);
|
|
|
|
else
|
|
|
|
vgic_v3_enable(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* vgic_init: allocates and initializes dist and vcpu data structures
|
|
|
|
* depending on two dimensioning parameters:
|
|
|
|
* - the number of spis
|
|
|
|
* - the number of vcpus
|
|
|
|
* The function is generally called when nr_spis has been explicitly set
|
|
|
|
* by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
|
|
|
|
* vgic_initialized() returns true when this function has succeeded.
|
|
|
|
*/
|
|
|
|
int vgic_init(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct vgic_dist *dist = &kvm->arch.vgic;
|
|
|
|
struct kvm_vcpu *vcpu;
|
2025-02-12 18:25:58 +00:00
|
|
|
int ret = 0;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long idx;
|
2015-12-21 18:09:38 +01:00
|
|
|
|
2023-03-27 16:47:47 +00:00
|
|
|
lockdep_assert_held(&kvm->arch.config_lock);
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
if (vgic_initialized(kvm))
|
|
|
|
return 0;
|
|
|
|
|
2018-07-03 22:54:14 +02:00
|
|
|
/* Are we also in the middle of creating a VCPU? */
|
|
|
|
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
|
|
|
|
return -EBUSY;
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
/* freeze the number of spis */
|
|
|
|
if (!dist->nr_spis)
|
|
|
|
dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
|
|
|
|
|
|
|
|
ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
2020-04-24 15:30:30 +01:00
|
|
|
/*
|
2025-07-23 23:28:00 -07:00
|
|
|
* Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
|
|
|
|
* vLPIs) is supported.
|
2020-04-24 15:30:30 +01:00
|
|
|
*/
|
2025-07-23 23:28:00 -07:00
|
|
|
if (vgic_supports_direct_irqs(kvm)) {
|
2018-01-12 11:40:21 +01:00
|
|
|
ret = vgic_v4_init(kvm);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
}
|
2017-10-27 15:28:38 +01:00
|
|
|
|
2021-11-16 16:04:02 +00:00
|
|
|
kvm_for_each_vcpu(idx, vcpu, kvm)
|
2017-05-08 12:09:13 +02:00
|
|
|
kvm_vgic_vcpu_enable(vcpu);
|
2015-12-21 18:09:38 +01:00
|
|
|
|
2016-07-22 16:20:41 +00:00
|
|
|
ret = kvm_vgic_setup_default_irq_routing(kvm);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
2017-01-17 23:09:13 +01:00
|
|
|
vgic_debug_init(kvm);
|
2015-12-21 18:09:38 +01:00
|
|
|
dist->initialized = true;
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_vgic_dist_destroy(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct vgic_dist *dist = &kvm->arch.vgic;
|
2018-05-22 09:55:08 +02:00
|
|
|
struct vgic_redist_region *rdreg, *next;
|
2015-12-21 18:09:38 +01:00
|
|
|
|
|
|
|
dist->ready = false;
|
|
|
|
dist->initialized = false;
|
|
|
|
|
|
|
|
kfree(dist->spis);
|
2018-05-22 09:55:06 +02:00
|
|
|
dist->spis = NULL;
|
2015-12-21 18:09:38 +01:00
|
|
|
dist->nr_spis = 0;
|
2021-04-05 18:39:36 +02:00
|
|
|
dist->vgic_dist_base = VGIC_ADDR_UNDEF;
|
2017-10-27 15:28:38 +01:00
|
|
|
|
2021-04-05 18:39:36 +02:00
|
|
|
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
2021-04-05 18:39:39 +02:00
|
|
|
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
|
2024-06-05 18:56:37 +01:00
|
|
|
vgic_v3_free_redist_region(kvm, rdreg);
|
2018-05-22 09:55:08 +02:00
|
|
|
INIT_LIST_HEAD(&dist->rd_regions);
|
2021-04-05 18:39:36 +02:00
|
|
|
} else {
|
|
|
|
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
|
2018-05-22 09:55:08 +02:00
|
|
|
}
|
|
|
|
|
2025-07-23 23:28:00 -07:00
|
|
|
if (vgic_supports_direct_irqs(kvm))
|
2017-10-27 15:28:38 +01:00
|
|
|
vgic_v4_teardown(kvm);
|
2024-02-21 05:42:44 +00:00
|
|
|
|
|
|
|
xa_destroy(&dist->lpi_xa);
|
2015-12-21 18:09:38 +01:00
|
|
|
}
|
|
|
|
|
2023-12-07 15:11:58 +00:00
|
|
|
static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
2015-12-21 18:09:38 +01:00
|
|
|
{
|
|
|
|
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
|
|
|
|
2020-04-14 11:03:47 +08:00
|
|
|
/*
|
|
|
|
* Retire all pending LPIs on this vcpu anyway as we're
|
|
|
|
* going to destroy it.
|
|
|
|
*/
|
|
|
|
vgic_flush_pending_lpis(vcpu);
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
|
KVM: arm64: vgic: Allocate private interrupts on demand
Private interrupts are currently part of the CPU interface structure
that is part of each and every vcpu we create.
Currently, we have 32 of them per vcpu, resulting in a per-vcpu array
that is just shy of 4kB. On its own, that's no big deal, but it gets
in the way of other things:
- each vcpu gets mapped at EL2 on nVHE/hVHE configurations. This
requires memory that is physically contiguous. However, the EL2
code has no purpose looking at the interrupt structures and
could do without them being mapped.
- supporting features such as EPPIs, which extend the number of
private interrupts past the 32 limit would make the array
even larger, even for VMs that do not use the EPPI feature.
Address these issues by moving the private interrupt array outside
of the vcpu, and replace it with a simple pointer. We take this
opportunity to make it obvious what gets initialised when, as
that path was remarkably opaque, and tighten the locking.
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240502154545.3012089-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-05-02 16:45:45 +01:00
|
|
|
kfree(vgic_cpu->private_irqs);
|
|
|
|
vgic_cpu->private_irqs = NULL;
|
|
|
|
|
2024-10-07 22:39:09 +00:00
|
|
|
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
|
|
|
/*
|
|
|
|
* If this vCPU is being destroyed because of a failed creation
|
|
|
|
* then unregister the redistributor to avoid leaving behind a
|
|
|
|
* dangling pointer to the vCPU struct.
|
|
|
|
*
|
|
|
|
* vCPUs that have been successfully created (i.e. added to
|
|
|
|
* kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as
|
|
|
|
* this function gets called while holding kvm->arch.config_lock
|
|
|
|
* in the VM teardown path and would otherwise introduce a lock
|
|
|
|
* inversion w.r.t. kvm->srcu.
|
|
|
|
*
|
|
|
|
* vCPUs that failed creation are torn down outside of the
|
|
|
|
* kvm->arch.config_lock and do not get unregistered in
|
|
|
|
* kvm_vgic_destroy(), meaning it is both safe and necessary to
|
|
|
|
* do so here.
|
|
|
|
*/
|
|
|
|
if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu)
|
|
|
|
vgic_unregister_redist_iodev(vcpu);
|
|
|
|
|
2023-12-07 15:11:59 +00:00
|
|
|
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
|
2024-10-07 22:39:09 +00:00
|
|
|
}
|
2015-12-21 18:09:38 +01:00
|
|
|
}
|
|
|
|
|
2023-12-07 15:11:58 +00:00
|
|
|
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
|
|
|
|
mutex_lock(&kvm->slots_lock);
|
|
|
|
__kvm_vgic_vcpu_destroy(vcpu);
|
|
|
|
mutex_unlock(&kvm->slots_lock);
|
|
|
|
}
|
|
|
|
|
2023-12-07 15:11:57 +00:00
|
|
|
void kvm_vgic_destroy(struct kvm *kvm)
|
2015-12-21 18:09:38 +01:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2015-12-21 18:09:38 +01:00
|
|
|
|
2023-12-07 15:11:57 +00:00
|
|
|
mutex_lock(&kvm->slots_lock);
|
2024-08-08 10:15:46 +01:00
|
|
|
mutex_lock(&kvm->arch.config_lock);
|
2023-03-27 16:47:47 +00:00
|
|
|
|
2017-01-17 23:09:13 +01:00
|
|
|
vgic_debug_destroy(kvm);
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm)
|
2023-12-07 15:11:58 +00:00
|
|
|
__kvm_vgic_vcpu_destroy(vcpu);
|
2020-04-14 11:03:47 +08:00
|
|
|
|
|
|
|
kvm_vgic_dist_destroy(kvm);
|
2015-12-21 18:09:38 +01:00
|
|
|
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_unlock(&kvm->arch.config_lock);
|
2024-08-19 13:50:45 +01:00
|
|
|
|
|
|
|
if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
|
vgic_unregister_redist_iodev(vcpu);
|
|
|
|
|
2023-12-07 15:11:57 +00:00
|
|
|
mutex_unlock(&kvm->slots_lock);
|
2017-01-12 09:21:56 +00:00
|
|
|
}
|
|
|
|
|
2015-12-21 18:09:38 +01:00
|
|
|
/**
|
|
|
|
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
|
2023-05-18 11:09:18 +01:00
|
|
|
* is a GICv2. A GICv3 must be explicitly initialized by userspace using the
|
2015-12-21 18:09:38 +01:00
|
|
|
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
|
|
|
|
* @kvm: kvm struct pointer
|
|
|
|
*/
|
|
|
|
int vgic_lazy_init(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (unlikely(!vgic_initialized(kvm))) {
|
|
|
|
/*
|
|
|
|
* We only provide the automatic initialization of the VGIC
|
|
|
|
* for the legacy case of a GICv2. Any other type must
|
|
|
|
* be explicitly initialized once setup with the respective
|
|
|
|
* KVM device call.
|
|
|
|
*/
|
|
|
|
if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
|
|
|
|
return -EBUSY;
|
|
|
|
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_lock(&kvm->arch.config_lock);
|
2015-12-21 18:09:38 +01:00
|
|
|
ret = vgic_init(kvm);
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_unlock(&kvm->arch.config_lock);
|
2015-12-21 18:09:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-12-21 15:04:42 +01:00
|
|
|
/* RESOURCE MAPPING */
|
|
|
|
|
|
|
|
/**
|
2024-01-17 15:07:12 -08:00
|
|
|
* kvm_vgic_map_resources - map the MMIO regions
|
|
|
|
* @kvm: kvm struct pointer
|
|
|
|
*
|
2015-12-21 15:04:42 +01:00
|
|
|
* Map the MMIO regions depending on the VGIC model exposed to the guest
|
|
|
|
* called on the first VCPU run.
|
|
|
|
* Also map the virtual CPU interface into the VM.
|
2020-12-01 15:01:56 +00:00
|
|
|
* v2 calls vgic_init() if not already done.
|
|
|
|
* v3 and derivatives return an error if the VGIC is not initialized.
|
2015-12-21 15:04:42 +01:00
|
|
|
* vgic_ready() returns true if this function has succeeded.
|
|
|
|
*/
|
|
|
|
int kvm_vgic_map_resources(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct vgic_dist *dist = &kvm->arch.vgic;
|
2023-06-07 15:38:44 +01:00
|
|
|
enum vgic_type type;
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
gpa_t dist_base;
|
2015-12-21 15:04:42 +01:00
|
|
|
int ret = 0;
|
|
|
|
|
2020-12-01 15:01:55 +00:00
|
|
|
if (likely(vgic_ready(kvm)))
|
|
|
|
return 0;
|
|
|
|
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
mutex_lock(&kvm->slots_lock);
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_lock(&kvm->arch.config_lock);
|
2020-12-01 15:01:55 +00:00
|
|
|
if (vgic_ready(kvm))
|
|
|
|
goto out;
|
|
|
|
|
2015-12-21 15:04:42 +01:00
|
|
|
if (!irqchip_in_kernel(kvm))
|
|
|
|
goto out;
|
|
|
|
|
2023-06-07 15:38:44 +01:00
|
|
|
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
|
2015-12-21 15:04:42 +01:00
|
|
|
ret = vgic_v2_map_resources(kvm);
|
2023-06-07 15:38:44 +01:00
|
|
|
type = VGIC_V2;
|
|
|
|
} else {
|
2015-12-21 15:04:42 +01:00
|
|
|
ret = vgic_v3_map_resources(kvm);
|
2023-06-07 15:38:44 +01:00
|
|
|
type = VGIC_V3;
|
|
|
|
}
|
2017-01-12 09:21:56 +00:00
|
|
|
|
2023-12-07 15:11:57 +00:00
|
|
|
if (ret)
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
goto out;
|
2023-12-07 15:11:57 +00:00
|
|
|
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
dist_base = dist->vgic_dist_base;
|
|
|
|
mutex_unlock(&kvm->arch.config_lock);
|
|
|
|
|
2023-06-07 15:38:44 +01:00
|
|
|
ret = vgic_register_dist_iodev(kvm, dist_base, type);
|
2024-10-17 00:19:47 +00:00
|
|
|
if (ret) {
|
KVM: arm64: vgic: Fix a circular locking issue
Lockdep reports a circular lock dependency between the srcu and the
config_lock:
[ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}:
[ 262.182010] __synchronize_srcu+0xb0/0x224
[ 262.183422] synchronize_srcu_expedited+0x24/0x34
[ 262.184554] kvm_io_bus_register_dev+0x324/0x50c
[ 262.185650] vgic_register_redist_iodev+0x254/0x398
[ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724
[ 262.188087] kvm_vgic_addr+0x364/0x600
[ 262.189189] vgic_set_common_attr+0x90/0x544
[ 262.190278] vgic_v3_set_attr+0x74/0x9c
[ 262.191432] kvm_device_ioctl+0x2a0/0x4e4
[ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.195006] do_el0_svc+0xe4/0x2d4
[ 262.195929] el0_svc+0x44/0x8c
[ 262.196917] el0t_64_sync_handler+0xf4/0x120
[ 262.198238] el0t_64_sync+0x190/0x194
[ 262.199224]
[ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}:
[ 262.201094] __lock_acquire+0x2b70/0x626c
[ 262.202245] lock_acquire+0x454/0x778
[ 262.203132] __mutex_lock+0x190/0x8b4
[ 262.204023] mutex_lock_nested+0x24/0x30
[ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0
[ 262.206178] dispatch_mmio_write+0xd8/0x258
[ 262.207498] __kvm_io_bus_write+0x1e0/0x350
[ 262.208582] kvm_io_bus_write+0xe0/0x1cc
[ 262.209653] io_mem_abort+0x2ac/0x6d8
[ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88
[ 262.211937] handle_exit+0xc4/0x39c
[ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04
[ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8
[ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8
[ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0
[ 262.217774] do_el0_svc+0xe4/0x2d4
[ 262.218758] el0_svc+0x44/0x8c
[ 262.219941] el0t_64_sync_handler+0xf4/0x120
[ 262.221110] el0t_64_sync+0x190/0x194
Note that the current report, which can be triggered by the vgic_irq
kselftest, is a triple chain that includes slots_lock, but after
inverting the slots_lock/config_lock dependency, the actual problem
reported above remains.
In several places, the vgic code calls kvm_io_bus_register_dev(), which
synchronizes the srcu, while holding config_lock (#1). And the MMIO
handler takes the config_lock while holding the srcu read lock (#0).
Break dependency #1, by registering the distributor and redistributors
without holding config_lock. The ITS also uses kvm_io_bus_register_dev()
but already relies on slots_lock to serialize calls.
The distributor iodev is created on the first KVM_RUN call. Multiple
threads will race for vgic initialization, and only the first one will
see !vgic_ready() under the lock. To serialize those threads, rely on
slots_lock rather than config_lock.
Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR
ioctls and vCPU creation. Similarly, serialize the iodev creation with
slots_lock, and the rest with config_lock.
Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
2023-05-18 11:09:15 +01:00
|
|
|
kvm_err("Unable to register VGIC dist MMIO regions\n");
|
2024-10-17 00:19:47 +00:00
|
|
|
goto out_slots;
|
|
|
|
}
|
2017-01-12 09:21:56 +00:00
|
|
|
|
2024-10-17 00:19:47 +00:00
|
|
|
/*
|
|
|
|
* kvm_io_bus_register_dev() guarantees all readers see the new MMIO
|
|
|
|
* registration before returning through synchronize_srcu(), which also
|
|
|
|
* implies a full memory barrier. As such, marking the distributor as
|
|
|
|
* 'ready' here is guaranteed to be ordered after all vCPUs having seen
|
|
|
|
* a completely configured distributor.
|
|
|
|
*/
|
|
|
|
dist->ready = true;
|
2023-12-07 15:11:57 +00:00
|
|
|
goto out_slots;
|
2015-12-21 15:04:42 +01:00
|
|
|
out:
|
2023-03-27 16:47:47 +00:00
|
|
|
mutex_unlock(&kvm->arch.config_lock);
|
2023-12-07 15:11:57 +00:00
|
|
|
out_slots:
|
|
|
|
if (ret)
|
KVM: arm64: Don't eagerly teardown the vgic on init error
As there is very little ordering in the KVM API, userspace can
instanciate a half-baked GIC (missing its memory map, for example)
at almost any time.
This means that, with the right timing, a thread running vcpu-0
can enter the kernel without a GIC configured and get a GIC created
behind its back by another thread. Amusingly, it will pick up
that GIC and start messing with the data structures without the
GIC having been fully initialised.
Similarly, a thread running vcpu-1 can enter the kernel, and try
to init the GIC that was previously created. Since this GIC isn't
properly configured (no memory map), it fails to correctly initialise.
And that's the point where we decide to teardown the GIC, freeing all
its resources. Behind vcpu-0's back. Things stop pretty abruptly,
with a variety of symptoms. Clearly, this isn't good, we should be
a bit more careful about this.
It is obvious that this guest is not viable, as it is missing some
important part of its configuration. So instead of trying to tear
bits of it down, let's just mark it as *dead*. It means that any
further interaction from userspace will result in -EIO. The memory
will be released on the "normal" path, when userspace gives up.
Cc: stable@vger.kernel.org
Reported-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20241009183603.3221824-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2024-10-09 19:36:03 +01:00
|
|
|
kvm_vm_dead(kvm);
|
|
|
|
|
|
|
|
mutex_unlock(&kvm->slots_lock);
|
2023-12-07 15:11:57 +00:00
|
|
|
|
2015-12-21 15:04:42 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
/* GENERIC PROBE */
|
|
|
|
|
2022-11-30 23:09:00 +00:00
|
|
|
void kvm_vgic_cpu_up(void)
|
2015-12-01 15:02:35 +01:00
|
|
|
{
|
|
|
|
enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-11-30 23:09:00 +00:00
|
|
|
void kvm_vgic_cpu_down(void)
|
2016-07-13 17:17:02 +00:00
|
|
|
{
|
|
|
|
disable_percpu_irq(kvm_vgic_global_state.maint_irq);
|
2015-12-01 15:02:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
|
|
|
|
{
|
2025-02-25 17:29:24 +00:00
|
|
|
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
/*
|
|
|
|
* We cannot rely on the vgic maintenance interrupt to be
|
|
|
|
* delivered synchronously. This means we can only use it to
|
|
|
|
* exit the VM, and we perform the handling of EOIed
|
2018-05-02 11:53:03 +01:00
|
|
|
* interrupts on the exit path (see vgic_fold_lr_state).
|
2025-02-25 17:29:24 +00:00
|
|
|
*
|
|
|
|
* Of course, NV throws a wrench in this plan, and needs
|
|
|
|
* something special.
|
2015-12-01 15:02:35 +01:00
|
|
|
*/
|
2025-02-25 17:29:24 +00:00
|
|
|
if (vcpu && vgic_state_is_nested(vcpu))
|
|
|
|
vgic_v3_handle_nested_maint_irq(vcpu);
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2021-02-27 10:23:45 +00:00
|
|
|
static struct gic_kvm_info *gic_kvm_info;
|
|
|
|
|
|
|
|
void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
|
|
|
|
{
|
|
|
|
BUG_ON(gic_kvm_info != NULL);
|
|
|
|
gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
|
|
|
|
if (gic_kvm_info)
|
|
|
|
*gic_kvm_info = *info;
|
|
|
|
}
|
|
|
|
|
2017-03-18 13:56:56 +01:00
|
|
|
/**
|
|
|
|
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
|
|
|
|
*
|
|
|
|
* For a specific CPU, initialize the GIC VE hardware.
|
|
|
|
*/
|
|
|
|
void kvm_vgic_init_cpu_hardware(void)
|
|
|
|
{
|
|
|
|
BUG_ON(preemptible());
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We want to make sure the list registers start out clear so that we
|
|
|
|
* only have the program the used registers.
|
|
|
|
*/
|
2025-06-27 10:09:02 +00:00
|
|
|
if (kvm_vgic_global_state.type == VGIC_V2) {
|
2017-03-18 13:56:56 +01:00
|
|
|
vgic_v2_init_lrs();
|
2025-06-27 10:09:02 +00:00
|
|
|
} else if (kvm_vgic_global_state.type == VGIC_V3 ||
|
|
|
|
kvm_vgic_global_state.has_gcie_v3_compat) {
|
2017-03-18 13:56:56 +01:00
|
|
|
kvm_call_hyp(__vgic_v3_init_lrs);
|
2025-06-27 10:09:02 +00:00
|
|
|
}
|
2017-03-18 13:56:56 +01:00
|
|
|
}
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
/**
|
|
|
|
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
|
|
|
|
* according to the host GIC model. Accordingly calls either
|
|
|
|
* vgic_v2/v3_probe which registers the KVM_DEVICE that can be
|
|
|
|
* instantiated by a guest later on .
|
|
|
|
*/
|
|
|
|
int kvm_vgic_hyp_init(void)
|
|
|
|
{
|
2021-02-28 11:09:59 +00:00
|
|
|
bool has_mask;
|
2015-12-01 15:02:35 +01:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!gic_kvm_info)
|
|
|
|
return -ENODEV;
|
|
|
|
|
2021-02-28 11:09:59 +00:00
|
|
|
has_mask = !gic_kvm_info->no_maint_irq_mask;
|
|
|
|
|
|
|
|
if (has_mask && !gic_kvm_info->maint_irq) {
|
2015-12-01 15:02:35 +01:00
|
|
|
kvm_err("No vgic maintenance irq\n");
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
2021-03-15 21:56:47 +00:00
|
|
|
/*
|
|
|
|
* If we get one of these oddball non-GICs, taint the kernel,
|
|
|
|
* as we have no idea of how they *really* behave.
|
|
|
|
*/
|
|
|
|
if (gic_kvm_info->no_hw_deactivation) {
|
|
|
|
kvm_info("Non-architectural vgic, tainting kernel\n");
|
|
|
|
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
|
|
|
|
kvm_vgic_global_state.no_hw_deactivation = true;
|
|
|
|
}
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
switch (gic_kvm_info->type) {
|
|
|
|
case GIC_V2:
|
|
|
|
ret = vgic_v2_probe(gic_kvm_info);
|
|
|
|
break;
|
|
|
|
case GIC_V3:
|
|
|
|
ret = vgic_v3_probe(gic_kvm_info);
|
2016-09-12 15:49:15 +01:00
|
|
|
if (!ret) {
|
|
|
|
static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
|
|
|
|
kvm_info("GIC system register CPU interface enabled\n");
|
|
|
|
}
|
2015-12-01 15:02:35 +01:00
|
|
|
break;
|
2025-06-27 10:09:02 +00:00
|
|
|
case GIC_V5:
|
|
|
|
ret = vgic_v5_probe(gic_kvm_info);
|
|
|
|
break;
|
2015-12-01 15:02:35 +01:00
|
|
|
default:
|
|
|
|
ret = -ENODEV;
|
2019-08-25 10:44:17 +01:00
|
|
|
}
|
2015-12-01 15:02:35 +01:00
|
|
|
|
2021-02-27 10:23:45 +00:00
|
|
|
kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
|
|
|
|
|
|
|
|
kfree(gic_kvm_info);
|
|
|
|
gic_kvm_info = NULL;
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2023-01-03 09:50:21 +00:00
|
|
|
if (!has_mask && !kvm_vgic_global_state.maint_irq)
|
2021-02-28 11:09:59 +00:00
|
|
|
return 0;
|
|
|
|
|
2015-12-01 15:02:35 +01:00
|
|
|
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
|
|
|
|
vgic_maintenance_handler,
|
|
|
|
"vgic", kvm_get_running_vcpus());
|
|
|
|
if (ret) {
|
|
|
|
kvm_err("Cannot register interrupt %d\n",
|
|
|
|
kvm_vgic_global_state.maint_irq);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
|
|
|
|
return 0;
|
|
|
|
}
|