KVM: arm64: Allow userspace to change the implementation ID registers

KVM's treatment of the ID registers that describe the implementation
(MIDR, REVIDR, and AIDR) is interesting, to say the least. On the
userspace-facing end of it, KVM presents the values of the boot CPU on
all vCPUs and treats them as invariant. On the guest side of things KVM
presents the hardware values of the local CPU, which can change during
CPU migration in a big-little system.

While one may call this fragile, there is at least some degree of
predictability around it. For example, if a VMM wanted to present
big-little to a guest, it could affine vCPUs accordingly to the correct
clusters.

All of this makes a giant mess out of adding support for making these
implementation ID registers writable. Avoid breaking the rather subtle
ABI around the old way of doing things by requiring opt-in from
userspace to make the registers writable.

When the cap is enabled, allow userspace to set MIDR, REVIDR, and AIDR
to any non-reserved value and present those values consistently across
all vCPUs.

Signed-off-by: Sebastian Ott <sebott@redhat.com>
[oliver: changelog, capability]
Link: https://lore.kernel.org/r/20250225005401.679536-5-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
This commit is contained in:
Sebastian Ott 2025-02-24 16:54:00 -08:00 committed by Oliver Upton
parent d0d81e03e6
commit 3adaee7830
6 changed files with 78 additions and 7 deletions

View file

@ -8258,6 +8258,24 @@ KVM exits with the register state of either the L1 or L2 guest
depending on which executed at the time of an exit. Userspace must
take care to differentiate between these cases.
7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
-------------------------------------
:Architectures: arm64
:Target: VM
:Parameters: None
:Returns: 0 on success, -EBUSY if vCPUs have been created before enabling this
capability.
This capability changes the behavior of the registers that identify a PE
implementation of the Arm architecture: MIDR_EL1, REVIDR_EL1, and AIDR_EL1.
By default, these registers are visible to userspace but treated as invariant.
When this capability is enabled, KVM allows userspace to change the
aforementioned registers before the first KVM_RUN. These registers are VM
scoped, meaning that the same set of values are presented on all vCPUs in a
given VM.
8. Other capabilities.
======================

View file

@ -334,6 +334,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
/* SVE exposed to guest */
#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
/* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
unsigned long flags;
/* VM-wide vCPU feature set */

View file

@ -125,6 +125,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
}
mutex_unlock(&kvm->slots_lock);
break;
case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
r = 0;
set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags);
}
mutex_unlock(&kvm->lock);
break;
default:
break;
}
@ -313,6 +321,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SYSTEM_SUSPEND:
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_COUNTER_OFFSET:
case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:

View file

@ -45,7 +45,13 @@ static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt)
static inline u64 ctxt_midr_el1(struct kvm_cpu_context *ctxt)
{
struct kvm *kvm = kern_hyp_va(ctxt_to_vcpu(ctxt)->kvm);
if (!(ctxt_is_guest(ctxt) &&
test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags)))
return read_cpuid_id();
return kvm_read_vm_id_reg(kvm, SYS_MIDR_EL1);
}
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)

View file

@ -2524,6 +2524,17 @@ static bool access_imp_id_reg(struct kvm_vcpu *vcpu,
if (p->is_write)
return write_to_read_only(vcpu, p, r);
/*
* Return the VM-scoped implementation ID register values if userspace
* has made them writable.
*/
if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &vcpu->kvm->arch.flags))
return access_id_reg(vcpu, p, r);
/*
* Otherwise, fall back to the old behavior of returning the value of
* the current CPU.
*/
switch (reg_to_encoding(r)) {
case SYS_REVIDR_EL1:
p->regval = read_sysreg(revidr_el1);
@ -2567,19 +2578,43 @@ static u64 reset_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
u64 val)
{
struct kvm *kvm = vcpu->kvm;
u64 expected;
expected = read_id_reg(vcpu, r);
guard(mutex)(&kvm->arch.config_lock);
return (expected == val) ? 0 : -EINVAL;
expected = read_id_reg(vcpu, r);
if (expected == val)
return 0;
if (!test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags))
return -EINVAL;
/*
* Once the VM has started the ID registers are immutable. Reject the
* write if userspace tries to change it.
*/
if (kvm_vm_has_ran_once(kvm))
return -EBUSY;
/*
* Any value is allowed for the implementation ID registers so long as
* it is within the writable mask.
*/
if ((val & r->val) != val)
return -EINVAL;
kvm_set_vm_id_reg(kvm, reg_to_encoding(r), val);
return 0;
}
#define IMPLEMENTATION_ID(reg) { \
#define IMPLEMENTATION_ID(reg, mask) { \
SYS_DESC(SYS_##reg), \
.access = access_imp_id_reg, \
.get_user = get_id_reg, \
.set_user = set_imp_id_reg, \
.reset = reset_imp_id_reg, \
.val = mask, \
}
/*
@ -2630,9 +2665,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 },
IMPLEMENTATION_ID(MIDR_EL1),
IMPLEMENTATION_ID(MIDR_EL1, GENMASK_ULL(31, 0)),
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
IMPLEMENTATION_ID(REVIDR_EL1),
IMPLEMENTATION_ID(REVIDR_EL1, GENMASK_ULL(63, 0)),
/*
* ID regs: all ID_SANITISED() entries here must have corresponding
@ -2904,7 +2939,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.set_user = set_clidr, .val = ~CLIDR_EL1_RES0 },
{ SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
IMPLEMENTATION_ID(AIDR_EL1),
IMPLEMENTATION_ID(AIDR_EL1, GENMASK_ULL(63, 0)),
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
ID_FILTERED(CTR_EL0, ctr_el0,
CTR_EL0_DIC_MASK |

View file

@ -929,6 +929,7 @@ struct kvm_enable_cap {
#define KVM_CAP_PRE_FAULT_MEMORY 236
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;