linux/arch/x86/include/asm/msr.h
Linus Torvalds 7f9039c524 Generic:
* Clean up locking of all vCPUs for a VM by using the *_nest_lock()
   family of functions, and move duplicated code to virt/kvm/.
   kernel/ patches acked by Peter Zijlstra.
 
 * Add MGLRU support to the access tracking perf test.
 
 ARM fixes:
 
 * Make the irqbypass hooks resilient to changes in the GSI<->MSI
   routing, avoiding behind stale vLPI mappings being left behind. The
   fix is to resolve the VGIC IRQ using the host IRQ (which is stable)
   and nuking the vLPI mapping upon a routing change.
 
 * Close another VGIC race where vCPU creation races with VGIC
   creation, leading to in-flight vCPUs entering the kernel w/o private
   IRQs allocated.
 
 * Fix a build issue triggered by the recently added workaround for
   Ampere's AC04_CPU_23 erratum.
 
 * Correctly sign-extend the VA when emulating a TLBI instruction
   potentially targeting a VNCR mapping.
 
 * Avoid dereferencing a NULL pointer in the VGIC debug code, which can
   happen if the device doesn't have any mapping yet.
 
 s390:
 
 * Fix interaction between some filesystems and Secure Execution
 
 * Some cleanups and refactorings, preparing for an upcoming big series
 
 x86:
 
 * Wait for target vCPU to acknowledge KVM_REQ_UPDATE_PROTECTED_GUEST_STATE to
   fix a race between AP destroy and VMRUN.
 
 * Decrypt and dump the VMSA in dump_vmcb() if debugging enabled for the VM.
 
 * Refine and harden handling of spurious faults.
 
 * Add support for ALLOWED_SEV_FEATURES.
 
 * Add #VMGEXIT to the set of handlers special cased for CONFIG_RETPOLINE=y.
 
 * Treat DEBUGCTL[5:2] as reserved to pave the way for virtualizing features
   that utilize those bits.
 
 * Don't account temporary allocations in sev_send_update_data().
 
 * Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM, via Bus Lock Threshold.
 
 * Unify virtualization of IBRS on nested VM-Exit, and cross-vCPU IBPB, between
   SVM and VMX.
 
 * Advertise support to userspace for WRMSRNS and PREFETCHI.
 
 * Rescan I/O APIC routes after handling EOI that needed to be intercepted due
   to the old/previous routing, but not the new/current routing.
 
 * Add a module param to control and enumerate support for device posted
   interrupts.
 
 * Fix a potential overflow with nested virt on Intel systems running 32-bit kernels.
 
 * Flush shadow VMCSes on emergency reboot.
 
 * Add support for SNP to the various SEV selftests.
 
 * Add a selftest to verify fastops instructions via forced emulation.
 
 * Refine and optimize KVM's software processing of the posted interrupt bitmap, and share
   the harvesting code between KVM and the kernel's Posted MSI handler
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmg9TjwUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOUxQf7B7nnWqIKd7jSkGzSD6YsSX9TXktr
 2tJIOfWM3zNYg5GRCidg+m4Y5+DqQWd3Hi5hH2P9wUw7RNuOjOFsDe+y0VBr8ysE
 ve39t/yp+mYalNmHVFl8s3dBDgrIeGKiz+Wgw3zCQIBZ18rJE1dREhv37RlYZ3a2
 wSvuObe8sVpCTyKIowDs1xUi7qJUBoopMSuqfleSHawRrcgCpV99U8/KNFF5plLH
 7fXOBAHHniVCVc+mqQN2wxtVJDhST+U3TaU4GwlKy9Yevr+iibdOXffveeIgNEU4
 D6q1F2zKp6UdV3+p8hxyaTTbiCVDqsp9WOgY/0I/f+CddYn0WVZgOlR+ow==
 =mYFL
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
  Generic:

   - Clean up locking of all vCPUs for a VM by using the *_nest_lock()
     family of functions, and move duplicated code to virt/kvm/. kernel/
     patches acked by Peter Zijlstra

   - Add MGLRU support to the access tracking perf test

  ARM fixes:

   - Make the irqbypass hooks resilient to changes in the GSI<->MSI
     routing, avoiding behind stale vLPI mappings being left behind. The
     fix is to resolve the VGIC IRQ using the host IRQ (which is stable)
     and nuking the vLPI mapping upon a routing change

   - Close another VGIC race where vCPU creation races with VGIC
     creation, leading to in-flight vCPUs entering the kernel w/o
     private IRQs allocated

   - Fix a build issue triggered by the recently added workaround for
     Ampere's AC04_CPU_23 erratum

   - Correctly sign-extend the VA when emulating a TLBI instruction
     potentially targeting a VNCR mapping

   - Avoid dereferencing a NULL pointer in the VGIC debug code, which
     can happen if the device doesn't have any mapping yet

  s390:

   - Fix interaction between some filesystems and Secure Execution

   - Some cleanups and refactorings, preparing for an upcoming big
     series

  x86:

   - Wait for target vCPU to ack KVM_REQ_UPDATE_PROTECTED_GUEST_STATE
     to fix a race between AP destroy and VMRUN

   - Decrypt and dump the VMSA in dump_vmcb() if debugging enabled for
     the VM

   - Refine and harden handling of spurious faults

   - Add support for ALLOWED_SEV_FEATURES

   - Add #VMGEXIT to the set of handlers special cased for
     CONFIG_RETPOLINE=y

   - Treat DEBUGCTL[5:2] as reserved to pave the way for virtualizing
     features that utilize those bits

   - Don't account temporary allocations in sev_send_update_data()

   - Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM, via Bus Lock
     Threshold

   - Unify virtualization of IBRS on nested VM-Exit, and cross-vCPU
     IBPB, between SVM and VMX

   - Advertise support to userspace for WRMSRNS and PREFETCHI

   - Rescan I/O APIC routes after handling EOI that needed to be
     intercepted due to the old/previous routing, but not the
     new/current routing

   - Add a module param to control and enumerate support for device
     posted interrupts

   - Fix a potential overflow with nested virt on Intel systems running
     32-bit kernels

   - Flush shadow VMCSes on emergency reboot

   - Add support for SNP to the various SEV selftests

   - Add a selftest to verify fastops instructions via forced emulation

   - Refine and optimize KVM's software processing of the posted
     interrupt bitmap, and share the harvesting code between KVM and the
     kernel's Posted MSI handler"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (93 commits)
  rtmutex_api: provide correct extern functions
  KVM: arm64: vgic-debug: Avoid dereferencing NULL ITE pointer
  KVM: arm64: vgic-init: Plug vCPU vs. VGIC creation race
  KVM: arm64: Unmap vLPIs affected by changes to GSI routing information
  KVM: arm64: Resolve vLPI by host IRQ in vgic_v4_unset_forwarding()
  KVM: arm64: Protect vLPI translation with vgic_irq::irq_lock
  KVM: arm64: Use lock guard in vgic_v4_set_forwarding()
  KVM: arm64: Mask out non-VA bits from TLBI VA* on VNCR invalidation
  arm64: sysreg: Drag linux/kconfig.h to work around vdso build issue
  KVM: s390: Simplify and move pv code
  KVM: s390: Refactor and split some gmap helpers
  KVM: s390: Remove unneeded srcu lock
  s390: Remove unneeded includes
  s390/uv: Improve splitting of large folios that cannot be split while dirty
  s390/uv: Always return 0 from s390_wiggle_split_folio() if successful
  s390/uv: Don't return 0 from make_hva_secure() if the operation was not successful
  rust: add helper for mutex_trylock
  RISC-V: KVM: use kvm_trylock_all_vcpus when locking all vCPUs
  KVM: arm64: use kvm_trylock_all_vcpus when locking all vCPUs
  x86: KVM: SVM: use kvm_lock_all_vcpus instead of a custom implementation
  ...
2025-06-02 12:24:58 -07:00

335 lines
8.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_MSR_H
#define _ASM_X86_MSR_H
#include "msr-index.h"
#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
#include <linux/types.h>
#include <linux/percpu.h>
struct msr_info {
u32 msr_no;
struct msr reg;
struct msr __percpu *msrs;
int err;
};
struct msr_regs_info {
u32 *regs;
int err;
};
struct saved_msr {
bool valid;
struct msr_info info;
};
struct saved_msrs {
unsigned int num;
struct saved_msr *array;
};
/*
* Be very careful with includes. This header is prone to include loops.
*/
#include <asm/atomic.h>
#include <linux/tracepoint-defs.h>
#ifdef CONFIG_TRACEPOINTS
DECLARE_TRACEPOINT(read_msr);
DECLARE_TRACEPOINT(write_msr);
DECLARE_TRACEPOINT(rdpmc);
extern void do_trace_write_msr(u32 msr, u64 val, int failed);
extern void do_trace_read_msr(u32 msr, u64 val, int failed);
extern void do_trace_rdpmc(u32 msr, u64 val, int failed);
#else
static inline void do_trace_write_msr(u32 msr, u64 val, int failed) {}
static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
#endif
/*
* __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR
* accessors and should not have any tracing or other functionality piggybacking
* on them - those are *purely* for accessing MSRs and nothing more. So don't even
* think of extending them - you will be slapped with a stinking trout or a frozen
* shark will reach you, wherever you are! You've been warned.
*/
static __always_inline u64 __rdmsr(u32 msr)
{
EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
static __always_inline void __wrmsrq(u32 msr, u64 val)
{
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
}
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
(void)((val1) = (u32)__val); \
(void)((val2) = (u32)(__val >> 32)); \
} while (0)
static __always_inline u64 native_rdmsrq(u32 msr)
{
return __rdmsr(msr);
}
#define native_wrmsr(msr, low, high) \
__wrmsrq((msr), (u64)(high) << 32 | (low))
#define native_wrmsrq(msr, val) \
__wrmsrq((msr), (val))
static inline u64 native_read_msr(u32 msr)
{
u64 val;
val = __rdmsr(msr);
if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, val, 0);
return val;
}
static inline int native_read_msr_safe(u32 msr, u64 *p)
{
int err;
EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("1: rdmsr ; xor %[err],%[err]\n"
"2:\n\t"
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
: [err] "=r" (err), EAX_EDX_RET(val, low, high)
: "c" (msr));
if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), err);
*p = EAX_EDX_VAL(val, low, high);
return err;
}
/* Can be uninlined because referenced by paravirt */
static inline void notrace native_write_msr(u32 msr, u64 val)
{
native_wrmsrq(msr, val);
if (tracepoint_enabled(write_msr))
do_trace_write_msr(msr, val, 0);
}
/* Can be uninlined because referenced by paravirt */
static inline int notrace native_write_msr_safe(u32 msr, u64 val)
{
int err;
asm volatile("1: wrmsr ; xor %[err],%[err]\n"
"2:\n\t"
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
: [err] "=a" (err)
: "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
: "memory");
if (tracepoint_enabled(write_msr))
do_trace_write_msr(msr, val, err);
return err;
}
extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);
static inline u64 native_read_pmc(int counter)
{
EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
if (tracepoint_enabled(rdpmc))
do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#include <linux/errno.h>
/*
* Access to machine-specific registers (available on 586 and better only)
* Note: the rd* operations modify the parameters directly (without using
* pointer indirection), this allows gcc to optimize better
*/
#define rdmsr(msr, low, high) \
do { \
u64 __val = native_read_msr((msr)); \
(void)((low) = (u32)__val); \
(void)((high) = (u32)(__val >> 32)); \
} while (0)
static inline void wrmsr(u32 msr, u32 low, u32 high)
{
native_write_msr(msr, (u64)high << 32 | low);
}
#define rdmsrq(msr, val) \
((val) = native_read_msr((msr)))
static inline void wrmsrq(u32 msr, u64 val)
{
native_write_msr(msr, val);
}
/* wrmsr with exception handling */
static inline int wrmsrq_safe(u32 msr, u64 val)
{
return native_write_msr_safe(msr, val);
}
/* rdmsr with exception handling */
#define rdmsr_safe(msr, low, high) \
({ \
u64 __val; \
int __err = native_read_msr_safe((msr), &__val); \
(*low) = (u32)__val; \
(*high) = (u32)(__val >> 32); \
__err; \
})
static inline int rdmsrq_safe(u32 msr, u64 *p)
{
return native_read_msr_safe(msr, p);
}
static __always_inline u64 rdpmc(int counter)
{
return native_read_pmc(counter);
}
#endif /* !CONFIG_PARAVIRT_XXL */
/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
static __always_inline void wrmsrns(u32 msr, u64 val)
{
/*
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
* DS prefix to avoid a trailing NOP.
*/
asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
"2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
}
/*
* Dual u32 version of wrmsrq_safe():
*/
static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
{
return wrmsrq_safe(msr, (u64)high << 32 | low);
}
struct msr __percpu *msrs_alloc(void);
void msrs_free(struct msr __percpu *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
#else /* CONFIG_SMP */
static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
rdmsr(msr_no, *l, *h);
return 0;
}
static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
wrmsr(msr_no, l, h);
return 0;
}
static inline int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
rdmsrq(msr_no, *q);
return 0;
}
static inline int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
wrmsrq(msr_no, q);
return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr __percpu *msrs)
{
rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr __percpu *msrs)
{
wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
{
return rdmsr_safe(msr_no, l, h);
}
static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
return wrmsr_safe(msr_no, l, h);
}
static inline int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
return rdmsrq_safe(msr_no, q);
}
static inline int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
return wrmsrq_safe(msr_no, q);
}
static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
return rdmsr_safe_regs(regs);
}
static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
return wrmsr_safe_regs(regs);
}
#endif /* CONFIG_SMP */
/* Compatibility wrappers: */
#define rdmsrl(msr, val) rdmsrq(msr, val)
#define wrmsrl(msr, val) wrmsrq(msr, val)
#define rdmsrl_on_cpu(cpu, msr, q) rdmsrq_on_cpu(cpu, msr, q)
#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_MSR_H */