mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
Merge tag 'kvm-x86-misc-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.17 - Prevert the host's DEBUGCTL.FREEZE_IN_SMM (Intel only) when running the guest. Failure to honor FREEZE_IN_SMM can bleed host state into the guest. - Explicitly check vmcs12.GUEST_DEBUGCTL on nested VM-Enter (Intel only) to prevent L1 from running L2 with features that KVM doesn't support, e.g. BTF. - Intercept SPEC_CTRL on AMD if the MSR shouldn't exist according to the vCPU's CPUID model. - Rework the MSR interception code so that the SVM and VMX APIs are more or less identical. - Recalculate all MSR intercepts from the "source" on MSR filter changes, and drop the dedicated "shadow" bitmaps (and their awful "max" size defines). - WARN and reject loading kvm-amd.ko instead of panicking the kernel if the nested SVM MSRPM offsets tracker can't handle an MSR. - Advertise support for LKGS (Load Kernel GS base), a new instruction that's loosely related to FRED, but is supported and enumerated independently. - Fix a user-triggerable WARN that syzkaller found by stuffing INIT_RECEIVED, a.k.a. WFS, and then putting the vCPU into VMX Root Mode (post-VMXON). Use the same approach KVM uses for dealing with "impossible" emulation when running a !URG guest, and simply wait until KVM_RUN to detect that the vCPU has architecturally impossible state. - Add KVM_X86_DISABLE_EXITS_APERFMPERF to allow disabling interception of APERF/MPERF reads, so that a "properly" configured VM can "virtualize" APERF/MPERF (with many caveats). - Reject KVM_SET_TSC_KHZ if vCPUs have been created, as changing the "default" frequency is unsupported for VMs with a "secure" TSC, and there's no known use case for changing the default frequency for other VM types.
This commit is contained in:
commit
1a14928e2e
30 changed files with 931 additions and 751 deletions
|
@ -2006,7 +2006,7 @@ frequency is KHz.
|
|||
|
||||
If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also
|
||||
be used as a vm ioctl to set the initial tsc frequency of subsequently
|
||||
created vCPUs.
|
||||
created vCPUs. Note, the vm ioctl is only allowed prior to creating vCPUs.
|
||||
|
||||
For TSC protected Confidential Computing (CoCo) VMs where TSC frequency
|
||||
is configured once at VM scope and remains unchanged during VM's
|
||||
|
@ -7851,6 +7851,7 @@ Valid bits in args[0] are::
|
|||
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
|
||||
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
|
||||
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
|
||||
#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
|
||||
|
||||
Enabling this capability on a VM provides userspace with a way to no
|
||||
longer intercept some instructions for improved latency in some
|
||||
|
@ -7861,6 +7862,28 @@ all such vmexits.
|
|||
|
||||
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
|
||||
|
||||
Virtualizing the ``IA32_APERF`` and ``IA32_MPERF`` MSRs requires more
|
||||
than just disabling APERF/MPERF exits. While both Intel and AMD
|
||||
document strict usage conditions for these MSRs--emphasizing that only
|
||||
the ratio of their deltas over a time interval (T0 to T1) is
|
||||
architecturally defined--simply passing through the MSRs can still
|
||||
produce an incorrect ratio.
|
||||
|
||||
This erroneous ratio can occur if, between T0 and T1:
|
||||
|
||||
1. The vCPU thread migrates between logical processors.
|
||||
2. Live migration or suspend/resume operations take place.
|
||||
3. Another task shares the vCPU's logical processor.
|
||||
4. C-states lower than C0 are emulated (e.g., via HLT interception).
|
||||
5. The guest TSC frequency doesn't match the host TSC frequency.
|
||||
|
||||
Due to these complexities, KVM does not automatically associate this
|
||||
passthrough capability with the guest CPUID bit,
|
||||
``CPUID.6:ECX.APERFMPERF[bit 0]``. Userspace VMMs that deem this
|
||||
mechanism adequate for virtualizing the ``IA32_APERF`` and
|
||||
``IA32_MPERF`` MSRs must set the guest CPUID bit explicitly.
|
||||
|
||||
|
||||
7.14 KVM_CAP_S390_HPAGE_1M
|
||||
--------------------------
|
||||
|
||||
|
|
|
@ -49,7 +49,6 @@ KVM_X86_OP(set_idt)
|
|||
KVM_X86_OP(get_gdt)
|
||||
KVM_X86_OP(set_gdt)
|
||||
KVM_X86_OP(sync_dirty_debug_regs)
|
||||
KVM_X86_OP(set_dr6)
|
||||
KVM_X86_OP(set_dr7)
|
||||
KVM_X86_OP(cache_reg)
|
||||
KVM_X86_OP(get_rflags)
|
||||
|
@ -139,7 +138,7 @@ KVM_X86_OP(check_emulate_instruction)
|
|||
KVM_X86_OP(apic_init_signal_blocked)
|
||||
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
|
||||
KVM_X86_OP_OPTIONAL(migrate_timers)
|
||||
KVM_X86_OP(msr_filter_changed)
|
||||
KVM_X86_OP(recalc_msr_intercepts)
|
||||
KVM_X86_OP(complete_emulated_msr)
|
||||
KVM_X86_OP(vcpu_deliver_sipi_vector)
|
||||
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
|
||||
|
|
|
@ -1408,10 +1408,7 @@ struct kvm_arch {
|
|||
|
||||
gpa_t wall_clock;
|
||||
|
||||
bool mwait_in_guest;
|
||||
bool hlt_in_guest;
|
||||
bool pause_in_guest;
|
||||
bool cstate_in_guest;
|
||||
u64 disabled_exits;
|
||||
|
||||
s64 kvmclock_offset;
|
||||
|
||||
|
@ -1687,6 +1684,12 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
|
|||
return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
|
||||
}
|
||||
|
||||
enum kvm_x86_run_flags {
|
||||
KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0),
|
||||
KVM_RUN_LOAD_GUEST_DR6 = BIT(1),
|
||||
KVM_RUN_LOAD_DEBUGCTL = BIT(2),
|
||||
};
|
||||
|
||||
struct kvm_x86_ops {
|
||||
const char *name;
|
||||
|
||||
|
@ -1715,6 +1718,12 @@ struct kvm_x86_ops {
|
|||
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
|
||||
void (*vcpu_put)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* Mask of DEBUGCTL bits that are owned by the host, i.e. that need to
|
||||
* match the host's value even while the guest is active.
|
||||
*/
|
||||
const u64 HOST_OWNED_DEBUGCTL;
|
||||
|
||||
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
@ -1737,7 +1746,6 @@ struct kvm_x86_ops {
|
|||
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
|
||||
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
|
||||
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
|
||||
void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
|
||||
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
|
||||
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
|
||||
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
|
||||
|
@ -1768,7 +1776,7 @@ struct kvm_x86_ops {
|
|||
|
||||
int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
|
||||
enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
|
||||
bool force_immediate_exit);
|
||||
u64 run_flags);
|
||||
int (*handle_exit)(struct kvm_vcpu *vcpu,
|
||||
enum exit_fastpath_completion exit_fastpath);
|
||||
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
|
||||
|
@ -1900,7 +1908,7 @@ struct kvm_x86_ops {
|
|||
int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*migrate_timers)(struct kvm_vcpu *vcpu);
|
||||
void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
|
||||
void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu);
|
||||
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
|
||||
|
||||
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
|
||||
|
|
|
@ -419,6 +419,7 @@
|
|||
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
|
||||
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
|
||||
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
|
||||
#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
|
||||
|
||||
#define MSR_PEBS_FRONTEND 0x000003f7
|
||||
|
||||
|
|
|
@ -979,6 +979,7 @@ void kvm_set_cpu_caps(void)
|
|||
F(FSRS),
|
||||
F(FSRC),
|
||||
F(WRMSRNS),
|
||||
X86_64_F(LKGS),
|
||||
F(AMX_FP16),
|
||||
F(AVX_IFMA),
|
||||
F(LAM),
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#define APIC_BROADCAST 0xFF
|
||||
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
||||
|
||||
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
|
||||
|
||||
enum lapic_mode {
|
||||
LAPIC_MODE_DISABLED = 0,
|
||||
LAPIC_MODE_INVALID = X2APIC_ENABLE,
|
||||
|
|
|
@ -184,13 +184,88 @@ void recalc_intercepts(struct vcpu_svm *svm)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This array (and its actual size) holds the set of offsets (indexing by chunk
|
||||
* size) to process when merging vmcb12's MSRPM with vmcb01's MSRPM. Note, the
|
||||
* set of MSRs for which interception is disabled in vmcb01 is per-vCPU, e.g.
|
||||
* based on CPUID features. This array only tracks MSRs that *might* be passed
|
||||
* through to the guest.
|
||||
*
|
||||
* Hardcode the capacity of the array based on the maximum number of _offsets_.
|
||||
* MSRs are batched together, so there are fewer offsets than MSRs.
|
||||
*/
|
||||
static int nested_svm_msrpm_merge_offsets[7] __ro_after_init;
|
||||
static int nested_svm_nr_msrpm_merge_offsets __ro_after_init;
|
||||
typedef unsigned long nsvm_msrpm_merge_t;
|
||||
|
||||
int __init nested_svm_init_msrpm_merge_offsets(void)
|
||||
{
|
||||
static const u32 merge_msrs[] __initconst = {
|
||||
MSR_STAR,
|
||||
MSR_IA32_SYSENTER_CS,
|
||||
MSR_IA32_SYSENTER_EIP,
|
||||
MSR_IA32_SYSENTER_ESP,
|
||||
#ifdef CONFIG_X86_64
|
||||
MSR_GS_BASE,
|
||||
MSR_FS_BASE,
|
||||
MSR_KERNEL_GS_BASE,
|
||||
MSR_LSTAR,
|
||||
MSR_CSTAR,
|
||||
MSR_SYSCALL_MASK,
|
||||
#endif
|
||||
MSR_IA32_SPEC_CTRL,
|
||||
MSR_IA32_PRED_CMD,
|
||||
MSR_IA32_FLUSH_CMD,
|
||||
MSR_IA32_APERF,
|
||||
MSR_IA32_MPERF,
|
||||
MSR_IA32_LASTBRANCHFROMIP,
|
||||
MSR_IA32_LASTBRANCHTOIP,
|
||||
MSR_IA32_LASTINTFROMIP,
|
||||
MSR_IA32_LASTINTTOIP,
|
||||
};
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(merge_msrs); i++) {
|
||||
int bit_nr = svm_msrpm_bit_nr(merge_msrs[i]);
|
||||
u32 offset;
|
||||
|
||||
if (WARN_ON(bit_nr < 0))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* Merging is done in chunks to reduce the number of accesses
|
||||
* to L1's bitmap.
|
||||
*/
|
||||
offset = bit_nr / BITS_PER_BYTE / sizeof(nsvm_msrpm_merge_t);
|
||||
|
||||
for (j = 0; j < nested_svm_nr_msrpm_merge_offsets; j++) {
|
||||
if (nested_svm_msrpm_merge_offsets[j] == offset)
|
||||
break;
|
||||
}
|
||||
|
||||
if (j < nested_svm_nr_msrpm_merge_offsets)
|
||||
continue;
|
||||
|
||||
if (WARN_ON(j >= ARRAY_SIZE(nested_svm_msrpm_merge_offsets)))
|
||||
return -EIO;
|
||||
|
||||
nested_svm_msrpm_merge_offsets[j] = offset;
|
||||
nested_svm_nr_msrpm_merge_offsets++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function
|
||||
* is optimized in that it only merges the parts where KVM MSR permission bitmap
|
||||
* may contain zero bits.
|
||||
*/
|
||||
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
||||
static bool nested_svm_merge_msrpm(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
nsvm_msrpm_merge_t *msrpm02 = svm->nested.msrpm;
|
||||
nsvm_msrpm_merge_t *msrpm01 = svm->msrpm;
|
||||
int i;
|
||||
|
||||
/*
|
||||
|
@ -205,7 +280,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
|||
if (!svm->nested.force_msr_bitmap_recalc) {
|
||||
struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
|
||||
|
||||
if (kvm_hv_hypercall_enabled(&svm->vcpu) &&
|
||||
if (kvm_hv_hypercall_enabled(vcpu) &&
|
||||
hve->hv_enlightenments_control.msr_bitmap &&
|
||||
(svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
|
||||
goto set_msrpm_base_pa;
|
||||
|
@ -215,25 +290,17 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
|||
if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
|
||||
return true;
|
||||
|
||||
for (i = 0; i < MSRPM_OFFSETS; i++) {
|
||||
u32 value, p;
|
||||
u64 offset;
|
||||
for (i = 0; i < nested_svm_nr_msrpm_merge_offsets; i++) {
|
||||
const int p = nested_svm_msrpm_merge_offsets[i];
|
||||
nsvm_msrpm_merge_t l1_val;
|
||||
gpa_t gpa;
|
||||
|
||||
if (msrpm_offsets[i] == 0xffffffff)
|
||||
break;
|
||||
gpa = svm->nested.ctl.msrpm_base_pa + (p * sizeof(l1_val));
|
||||
|
||||
p = msrpm_offsets[i];
|
||||
|
||||
/* x2apic msrs are intercepted always for the nested guest */
|
||||
if (is_x2apic_msrpm_offset(p))
|
||||
continue;
|
||||
|
||||
offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
|
||||
|
||||
if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &l1_val, sizeof(l1_val)))
|
||||
return false;
|
||||
|
||||
svm->nested.msrpm[p] = svm->msrpm[p] | value;
|
||||
msrpm02[p] = msrpm01[p] | l1_val;
|
||||
}
|
||||
|
||||
svm->nested.force_msr_bitmap_recalc = false;
|
||||
|
@ -937,7 +1004,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
|||
if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
|
||||
goto out_exit_err;
|
||||
|
||||
if (nested_svm_vmrun_msrpm(svm))
|
||||
if (nested_svm_merge_msrpm(vcpu))
|
||||
goto out;
|
||||
|
||||
out_exit_err:
|
||||
|
@ -1230,7 +1297,6 @@ int svm_allocate_nested(struct vcpu_svm *svm)
|
|||
svm->nested.msrpm = svm_vcpu_alloc_msrpm();
|
||||
if (!svm->nested.msrpm)
|
||||
goto err_free_vmcb02;
|
||||
svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
|
||||
|
||||
svm->nested.initialized = true;
|
||||
return 0;
|
||||
|
@ -1290,26 +1356,26 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 offset, msr, value;
|
||||
int write, mask;
|
||||
gpa_t base = svm->nested.ctl.msrpm_base_pa;
|
||||
int write, bit_nr;
|
||||
u8 value, mask;
|
||||
u32 msr;
|
||||
|
||||
if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
|
||||
return NESTED_EXIT_HOST;
|
||||
|
||||
msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
|
||||
offset = svm_msrpm_offset(msr);
|
||||
bit_nr = svm_msrpm_bit_nr(msr);
|
||||
write = svm->vmcb->control.exit_info_1 & 1;
|
||||
mask = 1 << ((2 * (msr & 0xf)) + write);
|
||||
|
||||
if (offset == MSR_INVALID)
|
||||
if (bit_nr < 0)
|
||||
return NESTED_EXIT_DONE;
|
||||
|
||||
/* Offset is in 32 bit units but need in 8 bit units */
|
||||
offset *= 4;
|
||||
|
||||
if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
|
||||
if (kvm_vcpu_read_guest(&svm->vcpu, base + bit_nr / BITS_PER_BYTE,
|
||||
&value, sizeof(value)))
|
||||
return NESTED_EXIT_DONE;
|
||||
|
||||
mask = BIT(write) << (bit_nr & (BITS_PER_BYTE - 1));
|
||||
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
|
||||
}
|
||||
|
||||
|
@ -1819,13 +1885,11 @@ out_free:
|
|||
|
||||
static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (WARN_ON(!is_guest_mode(vcpu)))
|
||||
return true;
|
||||
|
||||
if (!vcpu->arch.pdptrs_from_userspace &&
|
||||
!nested_npt_enabled(svm) && is_pae_paging(vcpu))
|
||||
!nested_npt_enabled(to_svm(vcpu)) && is_pae_paging(vcpu))
|
||||
/*
|
||||
* Reload the guest's PDPTRs since after a migration
|
||||
* the guest CR3 might be restored prior to setting the nested
|
||||
|
@ -1834,7 +1898,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
|
|||
if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
|
||||
return false;
|
||||
|
||||
if (!nested_svm_vmrun_msrpm(svm)) {
|
||||
if (!nested_svm_merge_msrpm(vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror =
|
||||
KVM_INTERNAL_ERROR_EMULATION;
|
||||
|
|
|
@ -4390,16 +4390,17 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
|
|||
count, in);
|
||||
}
|
||||
|
||||
static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
|
||||
void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
/* Clear intercepts on MSRs that are context switched by hardware. */
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_AMD64_SEV_ES_GHCB, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_EFER, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_IA32_CR_PAT, MSR_TYPE_RW);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
|
||||
bool v_tsc_aux = guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) ||
|
||||
guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID);
|
||||
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
|
||||
}
|
||||
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX))
|
||||
svm_set_intercept_for_msr(vcpu, MSR_TSC_AUX, MSR_TYPE_RW,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID));
|
||||
|
||||
/*
|
||||
* For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
|
||||
|
@ -4413,11 +4414,9 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
|
|||
* XSAVES being exposed to the guest so that KVM can at least honor
|
||||
* guest CPUID for RDMSR and WRMSR.
|
||||
*/
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1);
|
||||
else
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0);
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_XSS, MSR_TYPE_RW,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) ||
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES));
|
||||
}
|
||||
|
||||
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
|
||||
|
@ -4429,16 +4428,12 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
|
|||
best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
|
||||
if (best)
|
||||
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
|
||||
|
||||
if (sev_es_guest(svm->vcpu.kvm))
|
||||
sev_es_vcpu_after_set_cpuid(svm);
|
||||
}
|
||||
|
||||
static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
|
||||
|
||||
|
@ -4496,10 +4491,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
|||
|
||||
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
|
||||
svm_clr_intercept(svm, INTERCEPT_XSETBV);
|
||||
|
||||
/* Clear intercepts on selected MSRs */
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
|
||||
}
|
||||
|
||||
void sev_init_vmcb(struct vcpu_svm *svm)
|
||||
|
|
|
@ -72,8 +72,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
|
|||
|
||||
static bool erratum_383_found __read_mostly;
|
||||
|
||||
u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
||||
|
||||
/*
|
||||
* Set osvw_len to higher value when updated Revision Guides
|
||||
* are published and we know what the new status bits are
|
||||
|
@ -82,72 +80,6 @@ static uint64_t osvw_len = 4, osvw_status;
|
|||
|
||||
static DEFINE_PER_CPU(u64, current_tsc_ratio);
|
||||
|
||||
#define X2APIC_MSR(x) (APIC_BASE_MSR + (x >> 4))
|
||||
|
||||
static const struct svm_direct_access_msrs {
|
||||
u32 index; /* Index of the MSR */
|
||||
bool always; /* True if intercept is initially cleared */
|
||||
} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
|
||||
{ .index = MSR_STAR, .always = true },
|
||||
{ .index = MSR_IA32_SYSENTER_CS, .always = true },
|
||||
{ .index = MSR_IA32_SYSENTER_EIP, .always = false },
|
||||
{ .index = MSR_IA32_SYSENTER_ESP, .always = false },
|
||||
#ifdef CONFIG_X86_64
|
||||
{ .index = MSR_GS_BASE, .always = true },
|
||||
{ .index = MSR_FS_BASE, .always = true },
|
||||
{ .index = MSR_KERNEL_GS_BASE, .always = true },
|
||||
{ .index = MSR_LSTAR, .always = true },
|
||||
{ .index = MSR_CSTAR, .always = true },
|
||||
{ .index = MSR_SYSCALL_MASK, .always = true },
|
||||
#endif
|
||||
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
|
||||
{ .index = MSR_IA32_PRED_CMD, .always = false },
|
||||
{ .index = MSR_IA32_FLUSH_CMD, .always = false },
|
||||
{ .index = MSR_IA32_DEBUGCTLMSR, .always = false },
|
||||
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
|
||||
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
|
||||
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
|
||||
{ .index = MSR_IA32_LASTINTTOIP, .always = false },
|
||||
{ .index = MSR_IA32_XSS, .always = false },
|
||||
{ .index = MSR_EFER, .always = false },
|
||||
{ .index = MSR_IA32_CR_PAT, .always = false },
|
||||
{ .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
|
||||
{ .index = MSR_TSC_AUX, .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_ID), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_LVR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_TASKPRI), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_ARBPRI), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_PROCPRI), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_EOI), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_RRR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_LDR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_DFR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_SPIV), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_ISR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_TMR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_IRR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_ESR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_ICR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_ICR2), .always = false },
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* AMD does not virtualize APIC TSC-deadline timer mode, but it is
|
||||
* emulated by KVM. When setting APIC LVTT (0x832) register bit 18,
|
||||
* the AVIC hardware would generate GP fault. Therefore, always
|
||||
* intercept the MSR 0x832, and do not setup direct_access_msr.
|
||||
*/
|
||||
{ .index = X2APIC_MSR(APIC_LVTTHMR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_LVTPC), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_LVT0), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_LVT1), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_LVTERR), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_TMICT), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_TMCCT), .always = false },
|
||||
{ .index = X2APIC_MSR(APIC_TDCR), .always = false },
|
||||
{ .index = MSR_INVALID, .always = false },
|
||||
};
|
||||
|
||||
/*
|
||||
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
|
||||
* pause_filter_count: On processors that support Pause filtering(indicated
|
||||
|
@ -265,33 +197,6 @@ static DEFINE_MUTEX(vmcb_dump_mutex);
|
|||
*/
|
||||
static int tsc_aux_uret_slot __read_mostly = -1;
|
||||
|
||||
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
|
||||
|
||||
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
|
||||
#define MSRS_RANGE_SIZE 2048
|
||||
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
|
||||
|
||||
u32 svm_msrpm_offset(u32 msr)
|
||||
{
|
||||
u32 offset;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_MSR_MAPS; i++) {
|
||||
if (msr < msrpm_ranges[i] ||
|
||||
msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
|
||||
continue;
|
||||
|
||||
offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
|
||||
offset += (i * MSRS_RANGE_SIZE); /* add range offset */
|
||||
|
||||
/* Now we have the u8 offset - but need the u32 offset */
|
||||
return offset / 4;
|
||||
}
|
||||
|
||||
/* MSR not in any range */
|
||||
return MSR_INVALID;
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -758,50 +663,8 @@ static void clr_dr_intercepts(struct vcpu_svm *svm)
|
|||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static int direct_access_msr_slot(u32 msr)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
|
||||
if (direct_access_msrs[i].index == msr)
|
||||
return i;
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
|
||||
int write)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int slot = direct_access_msr_slot(msr);
|
||||
|
||||
if (slot == -ENOENT)
|
||||
return;
|
||||
|
||||
/* Set the shadow bitmaps to the desired intercept states */
|
||||
if (read)
|
||||
set_bit(slot, svm->shadow_msr_intercept.read);
|
||||
else
|
||||
clear_bit(slot, svm->shadow_msr_intercept.read);
|
||||
|
||||
if (write)
|
||||
set_bit(slot, svm->shadow_msr_intercept.write);
|
||||
else
|
||||
clear_bit(slot, svm->shadow_msr_intercept.write);
|
||||
}
|
||||
|
||||
static bool valid_msr_intercept(u32 index)
|
||||
{
|
||||
return direct_access_msr_slot(index) != -ENOENT;
|
||||
}
|
||||
|
||||
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
u8 bit_write;
|
||||
unsigned long tmp;
|
||||
u32 offset;
|
||||
u32 *msrpm;
|
||||
|
||||
/*
|
||||
* For non-nested case:
|
||||
* If the L01 MSR bitmap does not intercept the MSR, then we need to
|
||||
|
@ -811,90 +674,102 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
|
|||
* If the L02 MSR bitmap does not intercept the MSR, then we need to
|
||||
* save it.
|
||||
*/
|
||||
msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
|
||||
to_svm(vcpu)->msrpm;
|
||||
void *msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm :
|
||||
to_svm(vcpu)->msrpm;
|
||||
|
||||
offset = svm_msrpm_offset(msr);
|
||||
bit_write = 2 * (msr & 0x0f) + 1;
|
||||
tmp = msrpm[offset];
|
||||
|
||||
BUG_ON(offset == MSR_INVALID);
|
||||
|
||||
return test_bit(bit_write, &tmp);
|
||||
return svm_test_msr_bitmap_write(msrpm, msr);
|
||||
}
|
||||
|
||||
static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
|
||||
u32 msr, int read, int write)
|
||||
void svm_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u8 bit_read, bit_write;
|
||||
unsigned long tmp;
|
||||
u32 offset;
|
||||
void *msrpm = svm->msrpm;
|
||||
|
||||
/*
|
||||
* If this warning triggers extend the direct_access_msrs list at the
|
||||
* beginning of the file
|
||||
*/
|
||||
WARN_ON(!valid_msr_intercept(msr));
|
||||
/* Don't disable interception for MSRs userspace wants to handle. */
|
||||
if (type & MSR_TYPE_R) {
|
||||
if (!set && kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
|
||||
svm_clear_msr_bitmap_read(msrpm, msr);
|
||||
else
|
||||
svm_set_msr_bitmap_read(msrpm, msr);
|
||||
}
|
||||
|
||||
/* Enforce non allowed MSRs to trap */
|
||||
if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
|
||||
read = 0;
|
||||
|
||||
if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
|
||||
write = 0;
|
||||
|
||||
offset = svm_msrpm_offset(msr);
|
||||
bit_read = 2 * (msr & 0x0f);
|
||||
bit_write = 2 * (msr & 0x0f) + 1;
|
||||
tmp = msrpm[offset];
|
||||
|
||||
BUG_ON(offset == MSR_INVALID);
|
||||
|
||||
read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
|
||||
write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
|
||||
|
||||
msrpm[offset] = tmp;
|
||||
if (type & MSR_TYPE_W) {
|
||||
if (!set && kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
|
||||
svm_clear_msr_bitmap_write(msrpm, msr);
|
||||
else
|
||||
svm_set_msr_bitmap_write(msrpm, msr);
|
||||
}
|
||||
|
||||
svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
|
||||
svm->nested.force_msr_bitmap_recalc = true;
|
||||
}
|
||||
|
||||
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
|
||||
int read, int write)
|
||||
void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask)
|
||||
{
|
||||
set_shadow_msr_intercept(vcpu, msr, read, write);
|
||||
set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
|
||||
}
|
||||
|
||||
u32 *svm_vcpu_alloc_msrpm(void)
|
||||
{
|
||||
unsigned int order = get_order(MSRPM_SIZE);
|
||||
struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, order);
|
||||
u32 *msrpm;
|
||||
unsigned int order = get_order(size);
|
||||
struct page *pages = alloc_pages(gfp_mask, order);
|
||||
void *pm;
|
||||
|
||||
if (!pages)
|
||||
return NULL;
|
||||
|
||||
msrpm = page_address(pages);
|
||||
memset(msrpm, 0xff, PAGE_SIZE * (1 << order));
|
||||
/*
|
||||
* Set all bits in the permissions map so that all MSR and I/O accesses
|
||||
* are intercepted by default.
|
||||
*/
|
||||
pm = page_address(pages);
|
||||
memset(pm, 0xff, PAGE_SIZE * (1 << order));
|
||||
|
||||
return msrpm;
|
||||
return pm;
|
||||
}
|
||||
|
||||
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
|
||||
static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
bool intercept = !(to_svm(vcpu)->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK);
|
||||
|
||||
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
|
||||
if (!direct_access_msrs[i].always)
|
||||
continue;
|
||||
set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1);
|
||||
}
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW, intercept);
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW, intercept);
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTFROMIP, MSR_TYPE_RW, intercept);
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTTOIP, MSR_TYPE_RW, intercept);
|
||||
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept);
|
||||
}
|
||||
|
||||
void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
|
||||
{
|
||||
static const u32 x2avic_passthrough_msrs[] = {
|
||||
X2APIC_MSR(APIC_ID),
|
||||
X2APIC_MSR(APIC_LVR),
|
||||
X2APIC_MSR(APIC_TASKPRI),
|
||||
X2APIC_MSR(APIC_ARBPRI),
|
||||
X2APIC_MSR(APIC_PROCPRI),
|
||||
X2APIC_MSR(APIC_EOI),
|
||||
X2APIC_MSR(APIC_RRR),
|
||||
X2APIC_MSR(APIC_LDR),
|
||||
X2APIC_MSR(APIC_DFR),
|
||||
X2APIC_MSR(APIC_SPIV),
|
||||
X2APIC_MSR(APIC_ISR),
|
||||
X2APIC_MSR(APIC_TMR),
|
||||
X2APIC_MSR(APIC_IRR),
|
||||
X2APIC_MSR(APIC_ESR),
|
||||
X2APIC_MSR(APIC_ICR),
|
||||
X2APIC_MSR(APIC_ICR2),
|
||||
|
||||
/*
|
||||
* Note! Always intercept LVTT, as TSC-deadline timer mode
|
||||
* isn't virtualized by hardware, and the CPU will generate a
|
||||
* #GP instead of a #VMEXIT.
|
||||
*/
|
||||
X2APIC_MSR(APIC_LVTTHMR),
|
||||
X2APIC_MSR(APIC_LVTPC),
|
||||
X2APIC_MSR(APIC_LVT0),
|
||||
X2APIC_MSR(APIC_LVT1),
|
||||
X2APIC_MSR(APIC_LVTERR),
|
||||
X2APIC_MSR(APIC_TMICT),
|
||||
X2APIC_MSR(APIC_TMCCT),
|
||||
X2APIC_MSR(APIC_TDCR),
|
||||
};
|
||||
int i;
|
||||
|
||||
if (intercept == svm->x2avic_msrs_intercepted)
|
||||
|
@ -903,84 +778,79 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
|
|||
if (!x2avic_enabled)
|
||||
return;
|
||||
|
||||
for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {
|
||||
int index = direct_access_msrs[i].index;
|
||||
|
||||
if ((index < APIC_BASE_MSR) ||
|
||||
(index > APIC_BASE_MSR + 0xff))
|
||||
continue;
|
||||
set_msr_interception(&svm->vcpu, svm->msrpm, index,
|
||||
!intercept, !intercept);
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
|
||||
svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
|
||||
MSR_TYPE_RW, intercept);
|
||||
|
||||
svm->x2avic_msrs_intercepted = intercept;
|
||||
}
|
||||
|
||||
void svm_vcpu_free_msrpm(u32 *msrpm)
|
||||
void svm_vcpu_free_msrpm(void *msrpm)
|
||||
{
|
||||
__free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE));
|
||||
}
|
||||
|
||||
static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
|
||||
static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 i;
|
||||
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_STAR, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_LSTAR, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_CSTAR, MSR_TYPE_RW);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_SYSCALL_MASK, MSR_TYPE_RW);
|
||||
#endif
|
||||
|
||||
if (lbrv)
|
||||
svm_recalc_lbr_msr_intercepts(vcpu);
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBPB))
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
|
||||
!guest_has_pred_cmd_msr(vcpu));
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D))
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
|
||||
|
||||
/*
|
||||
* Set intercept permissions for all direct access MSRs again. They
|
||||
* will automatically get filtered through the MSR filter, so we are
|
||||
* back in sync after this.
|
||||
* Disable interception of SPEC_CTRL if KVM doesn't need to manually
|
||||
* context switch the MSR (SPEC_CTRL is virtualized by the CPU), or if
|
||||
* the guest has a non-zero SPEC_CTRL value, i.e. is likely actively
|
||||
* using SPEC_CTRL.
|
||||
*/
|
||||
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
|
||||
u32 msr = direct_access_msrs[i].index;
|
||||
u32 read = test_bit(i, svm->shadow_msr_intercept.read);
|
||||
u32 write = test_bit(i, svm->shadow_msr_intercept.write);
|
||||
|
||||
set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write);
|
||||
}
|
||||
}
|
||||
|
||||
static void add_msr_offset(u32 offset)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MSRPM_OFFSETS; ++i) {
|
||||
|
||||
/* Offset already in list? */
|
||||
if (msrpm_offsets[i] == offset)
|
||||
return;
|
||||
|
||||
/* Slot used by another offset? */
|
||||
if (msrpm_offsets[i] != MSR_INVALID)
|
||||
continue;
|
||||
|
||||
/* Add offset to list */
|
||||
msrpm_offsets[i] = offset;
|
||||
|
||||
return;
|
||||
}
|
||||
if (cpu_feature_enabled(X86_FEATURE_V_SPEC_CTRL))
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
|
||||
!guest_has_spec_ctrl_msr(vcpu));
|
||||
else
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
|
||||
!svm->spec_ctrl);
|
||||
|
||||
/*
|
||||
* If this BUG triggers the msrpm_offsets table has an overflow. Just
|
||||
* increase MSRPM_OFFSETS in this case.
|
||||
* Intercept SYSENTER_EIP and SYSENTER_ESP when emulating an Intel CPU,
|
||||
* as AMD hardware only store 32 bits, whereas Intel CPUs track 64 bits.
|
||||
*/
|
||||
BUG();
|
||||
}
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW,
|
||||
guest_cpuid_is_intel_compatible(vcpu));
|
||||
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW,
|
||||
guest_cpuid_is_intel_compatible(vcpu));
|
||||
|
||||
static void init_msrpm_offsets(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
|
||||
|
||||
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
|
||||
u32 offset;
|
||||
|
||||
offset = svm_msrpm_offset(direct_access_msrs[i].index);
|
||||
BUG_ON(offset == MSR_INVALID);
|
||||
|
||||
add_msr_offset(offset);
|
||||
if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
|
||||
}
|
||||
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
sev_es_recalc_msr_intercepts(vcpu);
|
||||
|
||||
/*
|
||||
* x2APIC intercepts are modified on-demand and cannot be filtered by
|
||||
* userspace.
|
||||
*/
|
||||
}
|
||||
|
||||
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
|
||||
|
@ -999,13 +869,7 @@ void svm_enable_lbrv(struct kvm_vcpu *vcpu)
|
|||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
|
||||
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
|
||||
svm_recalc_lbr_msr_intercepts(vcpu);
|
||||
|
||||
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
|
||||
if (is_guest_mode(vcpu))
|
||||
|
@ -1017,12 +881,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
|
|||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
|
||||
|
||||
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
|
||||
svm_recalc_lbr_msr_intercepts(vcpu);
|
||||
|
||||
/*
|
||||
* Move the LBR msrs back to the vmcb01 to avoid copying them
|
||||
|
@ -1177,9 +1037,10 @@ void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
/* Evaluate instruction intercepts that depend on guest CPUID features. */
|
||||
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
|
||||
struct vcpu_svm *svm)
|
||||
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
|
||||
* roots, or if INVPCID is disabled in the guest to inject #UD.
|
||||
|
@ -1198,24 +1059,11 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
|
|||
else
|
||||
svm_set_intercept(svm, INTERCEPT_RDTSCP);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (guest_cpuid_is_intel_compatible(vcpu)) {
|
||||
/*
|
||||
* We must intercept SYSENTER_EIP and SYSENTER_ESP
|
||||
* accesses because the processor only stores 32 bits.
|
||||
* For the same reason we cannot use virtual VMLOAD/VMSAVE.
|
||||
*/
|
||||
svm_set_intercept(svm, INTERCEPT_VMLOAD);
|
||||
svm_set_intercept(svm, INTERCEPT_VMSAVE);
|
||||
svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
|
||||
} else {
|
||||
/*
|
||||
* If hardware supports Virtual VMLOAD VMSAVE then enable it
|
||||
|
@ -1226,12 +1074,15 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
|
||||
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
}
|
||||
/* No need to intercept these MSRs */
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void svm_recalc_intercepts_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
svm_recalc_instruction_intercepts(vcpu);
|
||||
svm_recalc_msr_intercepts(vcpu);
|
||||
}
|
||||
|
||||
static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
@ -1354,15 +1205,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
|||
svm_clr_intercept(svm, INTERCEPT_PAUSE);
|
||||
}
|
||||
|
||||
svm_recalc_instruction_intercepts(vcpu, svm);
|
||||
|
||||
/*
|
||||
* If the host supports V_SPEC_CTRL then disable the interception
|
||||
* of MSR_IA32_SPEC_CTRL.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_init_vmcb(svm, vmcb);
|
||||
|
||||
|
@ -1382,7 +1224,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
|||
sev_init_vmcb(svm);
|
||||
|
||||
svm_hv_init_vmcb(vmcb);
|
||||
init_vmcb_after_set_cpuid(vcpu);
|
||||
|
||||
svm_recalc_intercepts_after_set_cpuid(vcpu);
|
||||
|
||||
vmcb_mark_all_dirty(vmcb);
|
||||
|
||||
|
@ -1393,8 +1236,6 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
|
||||
|
||||
svm_init_osvw(vcpu);
|
||||
|
||||
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
|
||||
|
@ -1499,7 +1340,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
|
|||
sev_free_vcpu(vcpu);
|
||||
|
||||
__free_page(__sme_pa_to_page(svm->vmcb01.pa));
|
||||
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
|
||||
svm_vcpu_free_msrpm(svm->msrpm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_MITIGATIONS
|
||||
|
@ -2883,12 +2724,11 @@ static int svm_get_feature_msr(u32 msr, u64 *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
static bool sev_es_prevent_msr_access(struct kvm_vcpu *vcpu,
|
||||
struct msr_data *msr_info)
|
||||
{
|
||||
return sev_es_guest(vcpu->kvm) &&
|
||||
vcpu->arch.guest_state_protected &&
|
||||
svm_msrpm_offset(msr_info->index) != MSR_INVALID &&
|
||||
!msr_write_intercepted(vcpu, msr_info->index);
|
||||
}
|
||||
|
||||
|
@ -3119,11 +2959,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||
*
|
||||
* For nested:
|
||||
* The handling of the MSR bitmap for L2 guests is done in
|
||||
* nested_svm_vmrun_msrpm.
|
||||
* nested_svm_merge_msrpm().
|
||||
* We update the L1 MSR bit as well since it will end up
|
||||
* touching the MSR anyway now.
|
||||
*/
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
|
||||
svm_disable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
|
||||
break;
|
||||
case MSR_AMD64_VIRT_SPEC_CTRL:
|
||||
if (!msr->host_initiated &&
|
||||
|
@ -3189,8 +3029,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||
|
||||
/*
|
||||
* TSC_AUX is usually changed only during boot and never read
|
||||
* directly. Intercept TSC_AUX instead of exposing it to the
|
||||
* guest via direct_access_msrs, and switch it via user return.
|
||||
* directly. Intercept TSC_AUX and switch it via user return.
|
||||
*/
|
||||
preempt_disable();
|
||||
ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
|
||||
|
@ -4392,9 +4231,9 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
|
|||
guest_state_exit_irqoff();
|
||||
}
|
||||
|
||||
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
|
||||
bool force_immediate_exit)
|
||||
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
|
||||
{
|
||||
bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
|
||||
|
||||
|
@ -4441,10 +4280,13 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
|
|||
svm_hv_update_vp_id(svm->vmcb, vcpu);
|
||||
|
||||
/*
|
||||
* Run with all-zero DR6 unless needed, so that we can get the exact cause
|
||||
* of a #DB.
|
||||
* Run with all-zero DR6 unless the guest can write DR6 freely, so that
|
||||
* KVM can get the exact cause of a #DB. Note, loading guest DR6 from
|
||||
* KVM's snapshot is only necessary when DR accesses won't exit.
|
||||
*/
|
||||
if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
|
||||
if (unlikely(run_flags & KVM_RUN_LOAD_GUEST_DR6))
|
||||
svm_set_dr6(vcpu, vcpu->arch.dr6);
|
||||
else if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
|
||||
svm_set_dr6(vcpu, DR6_ACTIVE_LOW);
|
||||
|
||||
clgi();
|
||||
|
@ -4624,20 +4466,10 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
if (guest_cpuid_is_intel_compatible(vcpu))
|
||||
guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
|
||||
|
||||
svm_recalc_instruction_intercepts(vcpu, svm);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB))
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0,
|
||||
!!guest_has_pred_cmd_msr(vcpu));
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
|
||||
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
|
||||
!!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
|
||||
|
||||
if (sev_guest(vcpu->kvm))
|
||||
sev_vcpu_after_set_cpuid(svm);
|
||||
|
||||
init_vmcb_after_set_cpuid(vcpu);
|
||||
svm_recalc_intercepts_after_set_cpuid(vcpu);
|
||||
}
|
||||
|
||||
static bool svm_has_wbinvd_exit(void)
|
||||
|
@ -5188,7 +5020,7 @@ static int svm_vm_init(struct kvm *kvm)
|
|||
}
|
||||
|
||||
if (!pause_filter_count || !pause_filter_thresh)
|
||||
kvm->arch.pause_in_guest = true;
|
||||
kvm_disable_exits(kvm, KVM_X86_DISABLE_EXITS_PAUSE);
|
||||
|
||||
if (enable_apicv) {
|
||||
int ret = avic_vm_init(kvm);
|
||||
|
@ -5255,7 +5087,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
|||
.set_idt = svm_set_idt,
|
||||
.get_gdt = svm_get_gdt,
|
||||
.set_gdt = svm_set_gdt,
|
||||
.set_dr6 = svm_set_dr6,
|
||||
.set_dr7 = svm_set_dr7,
|
||||
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
|
||||
.cache_reg = svm_cache_reg,
|
||||
|
@ -5340,7 +5171,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
|||
|
||||
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
|
||||
|
||||
.msr_filter_changed = svm_msr_filter_changed,
|
||||
.recalc_msr_intercepts = svm_recalc_msr_intercepts,
|
||||
.complete_emulated_msr = svm_complete_emulated_msr,
|
||||
|
||||
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
|
||||
|
@ -5476,11 +5307,8 @@ static __init void svm_set_cpu_caps(void)
|
|||
|
||||
static __init int svm_hardware_setup(void)
|
||||
{
|
||||
int cpu;
|
||||
struct page *iopm_pages;
|
||||
void *iopm_va;
|
||||
int r;
|
||||
unsigned int order = get_order(IOPM_SIZE);
|
||||
int cpu, r;
|
||||
|
||||
/*
|
||||
* NX is required for shadow paging and for NPT if the NX huge pages
|
||||
|
@ -5492,17 +5320,6 @@ static __init int svm_hardware_setup(void)
|
|||
}
|
||||
kvm_enable_efer_bits(EFER_NX);
|
||||
|
||||
iopm_pages = alloc_pages(GFP_KERNEL, order);
|
||||
|
||||
if (!iopm_pages)
|
||||
return -ENOMEM;
|
||||
|
||||
iopm_va = page_address(iopm_pages);
|
||||
memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
|
||||
iopm_base = __sme_page_pa(iopm_pages);
|
||||
|
||||
init_msrpm_offsets();
|
||||
|
||||
kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
|
||||
XFEATURE_MASK_BNDCSR);
|
||||
|
||||
|
@ -5536,6 +5353,10 @@ static __init int svm_hardware_setup(void)
|
|||
if (nested) {
|
||||
pr_info("Nested Virtualization enabled\n");
|
||||
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
|
||||
|
||||
r = nested_svm_init_msrpm_merge_offsets();
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5567,6 +5388,13 @@ static __init int svm_hardware_setup(void)
|
|||
else
|
||||
pr_info("LBR virtualization supported\n");
|
||||
}
|
||||
|
||||
iopm_va = svm_alloc_permissions_map(IOPM_SIZE, GFP_KERNEL);
|
||||
if (!iopm_va)
|
||||
return -ENOMEM;
|
||||
|
||||
iopm_base = __sme_set(__pa(iopm_va));
|
||||
|
||||
/*
|
||||
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
|
||||
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
|
||||
|
|
|
@ -44,9 +44,6 @@ static inline struct page *__sme_pa_to_page(unsigned long pa)
|
|||
#define IOPM_SIZE PAGE_SIZE * 3
|
||||
#define MSRPM_SIZE PAGE_SIZE * 2
|
||||
|
||||
#define MAX_DIRECT_ACCESS_MSRS 48
|
||||
#define MSRPM_OFFSETS 32
|
||||
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
|
||||
extern bool npt_enabled;
|
||||
extern int nrips;
|
||||
extern int vgif;
|
||||
|
@ -189,8 +186,11 @@ struct svm_nested_state {
|
|||
u64 vmcb12_gpa;
|
||||
u64 last_vmcb12_gpa;
|
||||
|
||||
/* These are the merged vectors */
|
||||
u32 *msrpm;
|
||||
/*
|
||||
* The MSR permissions map used for vmcb02, which is the merge result
|
||||
* of vmcb01 and vmcb12
|
||||
*/
|
||||
void *msrpm;
|
||||
|
||||
/* A VMRUN has started but has not yet been performed, so
|
||||
* we cannot inject a nested vmexit yet. */
|
||||
|
@ -271,7 +271,7 @@ struct vcpu_svm {
|
|||
*/
|
||||
u64 virt_spec_ctrl;
|
||||
|
||||
u32 *msrpm;
|
||||
void *msrpm;
|
||||
|
||||
ulong nmi_iret_rip;
|
||||
|
||||
|
@ -326,12 +326,6 @@ struct vcpu_svm {
|
|||
struct list_head ir_list;
|
||||
spinlock_t ir_list_lock;
|
||||
|
||||
/* Save desired MSR intercept (read: pass-through) state */
|
||||
struct {
|
||||
DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
|
||||
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
|
||||
} shadow_msr_intercept;
|
||||
|
||||
struct vcpu_sev_es_state sev_es;
|
||||
|
||||
bool guest_state_loaded;
|
||||
|
@ -621,17 +615,74 @@ static inline void svm_vmgexit_no_action(struct vcpu_svm *svm, u64 data)
|
|||
svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data);
|
||||
}
|
||||
|
||||
/* svm.c */
|
||||
#define MSR_INVALID 0xffffffffU
|
||||
/*
|
||||
* The MSRPM is 8KiB in size, divided into four 2KiB ranges (the fourth range
|
||||
* is reserved). Each MSR within a range is covered by two bits, one each for
|
||||
* read (bit 0) and write (bit 1), where a bit value of '1' means intercepted.
|
||||
*/
|
||||
#define SVM_MSRPM_BYTES_PER_RANGE 2048
|
||||
#define SVM_BITS_PER_MSR 2
|
||||
#define SVM_MSRS_PER_BYTE (BITS_PER_BYTE / SVM_BITS_PER_MSR)
|
||||
#define SVM_MSRS_PER_RANGE (SVM_MSRPM_BYTES_PER_RANGE * SVM_MSRS_PER_BYTE)
|
||||
static_assert(SVM_MSRS_PER_RANGE == 8192);
|
||||
#define SVM_MSRPM_OFFSET_MASK (SVM_MSRS_PER_RANGE - 1)
|
||||
|
||||
static __always_inline int svm_msrpm_bit_nr(u32 msr)
|
||||
{
|
||||
int range_nr;
|
||||
|
||||
switch (msr & ~SVM_MSRPM_OFFSET_MASK) {
|
||||
case 0:
|
||||
range_nr = 0;
|
||||
break;
|
||||
case 0xc0000000:
|
||||
range_nr = 1;
|
||||
break;
|
||||
case 0xc0010000:
|
||||
range_nr = 2;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return range_nr * SVM_MSRPM_BYTES_PER_RANGE * BITS_PER_BYTE +
|
||||
(msr & SVM_MSRPM_OFFSET_MASK) * SVM_BITS_PER_MSR;
|
||||
}
|
||||
|
||||
#define __BUILD_SVM_MSR_BITMAP_HELPER(rtype, action, bitop, access, bit_rw) \
|
||||
static inline rtype svm_##action##_msr_bitmap_##access(unsigned long *bitmap, \
|
||||
u32 msr) \
|
||||
{ \
|
||||
int bit_nr; \
|
||||
\
|
||||
bit_nr = svm_msrpm_bit_nr(msr); \
|
||||
if (bit_nr < 0) \
|
||||
return (rtype)true; \
|
||||
\
|
||||
return bitop##_bit(bit_nr + bit_rw, bitmap); \
|
||||
}
|
||||
|
||||
#define BUILD_SVM_MSR_BITMAP_HELPERS(ret_type, action, bitop) \
|
||||
__BUILD_SVM_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0) \
|
||||
__BUILD_SVM_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 1)
|
||||
|
||||
BUILD_SVM_MSR_BITMAP_HELPERS(bool, test, test)
|
||||
BUILD_SVM_MSR_BITMAP_HELPERS(void, clear, __clear)
|
||||
BUILD_SVM_MSR_BITMAP_HELPERS(void, set, __set)
|
||||
|
||||
#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR)
|
||||
|
||||
/* svm.c */
|
||||
extern bool dump_invalid_vmcb;
|
||||
|
||||
u32 svm_msrpm_offset(u32 msr);
|
||||
u32 *svm_vcpu_alloc_msrpm(void);
|
||||
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
|
||||
void svm_vcpu_free_msrpm(u32 *msrpm);
|
||||
void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask);
|
||||
|
||||
static inline void *svm_vcpu_alloc_msrpm(void)
|
||||
{
|
||||
return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
}
|
||||
|
||||
void svm_vcpu_free_msrpm(void *msrpm);
|
||||
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
|
||||
void svm_enable_lbrv(struct kvm_vcpu *vcpu);
|
||||
void svm_update_lbrv(struct kvm_vcpu *vcpu);
|
||||
|
@ -651,6 +702,20 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool disable);
|
|||
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
|
||||
int trig_mode, int vec);
|
||||
|
||||
void svm_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set);
|
||||
|
||||
static inline void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
|
||||
u32 msr, int type)
|
||||
{
|
||||
svm_set_intercept_for_msr(vcpu, msr, type, false);
|
||||
}
|
||||
|
||||
static inline void svm_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
|
||||
u32 msr, int type)
|
||||
{
|
||||
svm_set_intercept_for_msr(vcpu, msr, type, true);
|
||||
}
|
||||
|
||||
/* nested.c */
|
||||
|
||||
#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
|
||||
|
@ -679,6 +744,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
|
|||
return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
|
||||
}
|
||||
|
||||
int __init nested_svm_init_msrpm_merge_offsets(void);
|
||||
|
||||
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
|
||||
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
|
||||
void svm_leave_nested(struct kvm_vcpu *vcpu);
|
||||
|
@ -762,6 +829,7 @@ void sev_init_vmcb(struct vcpu_svm *svm);
|
|||
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
|
||||
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
|
||||
void sev_es_vcpu_reset(struct vcpu_svm *svm);
|
||||
void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu);
|
||||
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
|
||||
void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa);
|
||||
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
|
||||
|
|
|
@ -53,8 +53,6 @@ struct vcpu_vt {
|
|||
#ifdef CONFIG_X86_64
|
||||
u64 msr_host_kernel_gs_base;
|
||||
#endif
|
||||
|
||||
unsigned long host_debugctlmsr;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KVM_INTEL_TDX
|
||||
|
|
|
@ -175,12 +175,12 @@ static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
|
|||
return vmx_vcpu_pre_run(vcpu);
|
||||
}
|
||||
|
||||
static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
|
||||
static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
|
||||
{
|
||||
if (is_td_vcpu(vcpu))
|
||||
return tdx_vcpu_run(vcpu, force_immediate_exit);
|
||||
return tdx_vcpu_run(vcpu, run_flags);
|
||||
|
||||
return vmx_vcpu_run(vcpu, force_immediate_exit);
|
||||
return vmx_vcpu_run(vcpu, run_flags);
|
||||
}
|
||||
|
||||
static int vt_handle_exit(struct kvm_vcpu *vcpu,
|
||||
|
@ -220,7 +220,7 @@ static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
return vmx_get_msr(vcpu, msr_info);
|
||||
}
|
||||
|
||||
static void vt_msr_filter_changed(struct kvm_vcpu *vcpu)
|
||||
static void vt_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* TDX doesn't allow VMM to configure interception of MSR accesses.
|
||||
|
@ -231,7 +231,7 @@ static void vt_msr_filter_changed(struct kvm_vcpu *vcpu)
|
|||
if (is_td_vcpu(vcpu))
|
||||
return;
|
||||
|
||||
vmx_msr_filter_changed(vcpu);
|
||||
vmx_recalc_msr_intercepts(vcpu);
|
||||
}
|
||||
|
||||
static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
|
||||
|
@ -489,14 +489,6 @@ static void vt_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
|
|||
vmx_set_gdt(vcpu, dt);
|
||||
}
|
||||
|
||||
static void vt_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
if (is_td_vcpu(vcpu))
|
||||
return;
|
||||
|
||||
vmx_set_dr6(vcpu, val);
|
||||
}
|
||||
|
||||
static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
if (is_td_vcpu(vcpu))
|
||||
|
@ -923,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
|||
.vcpu_load = vt_op(vcpu_load),
|
||||
.vcpu_put = vt_op(vcpu_put),
|
||||
|
||||
.HOST_OWNED_DEBUGCTL = VMX_HOST_OWNED_DEBUGCTL_BITS,
|
||||
|
||||
.update_exception_bitmap = vt_op(update_exception_bitmap),
|
||||
.get_feature_msr = vmx_get_feature_msr,
|
||||
.get_msr = vt_op(get_msr),
|
||||
|
@ -943,7 +937,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
|||
.set_idt = vt_op(set_idt),
|
||||
.get_gdt = vt_op(get_gdt),
|
||||
.set_gdt = vt_op(set_gdt),
|
||||
.set_dr6 = vt_op(set_dr6),
|
||||
.set_dr7 = vt_op(set_dr7),
|
||||
.sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs),
|
||||
.cache_reg = vt_op(cache_reg),
|
||||
|
@ -1034,7 +1027,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
|||
.apic_init_signal_blocked = vt_op(apic_init_signal_blocked),
|
||||
.migrate_timers = vmx_migrate_timers,
|
||||
|
||||
.msr_filter_changed = vt_op(msr_filter_changed),
|
||||
.recalc_msr_intercepts = vt_op(recalc_msr_intercepts),
|
||||
.complete_emulated_msr = vt_op(complete_emulated_msr),
|
||||
|
||||
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
|
||||
|
|
|
@ -715,6 +715,12 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
|
|||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
|
||||
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_APERF, MSR_TYPE_R);
|
||||
|
||||
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
||||
MSR_IA32_MPERF, MSR_TYPE_R);
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &map);
|
||||
|
||||
vmx->nested.force_msr_bitmap_recalc = false;
|
||||
|
@ -2663,10 +2669,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
if (vmx->nested.nested_run_pending &&
|
||||
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
|
||||
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
|
||||
vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
|
||||
vmx_get_supported_debugctl(vcpu, false));
|
||||
} else {
|
||||
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
|
||||
vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
|
||||
}
|
||||
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
|
||||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
|
||||
|
@ -3156,7 +3163,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
|
|||
return -EINVAL;
|
||||
|
||||
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
|
||||
CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
|
||||
(CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
|
||||
CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
|
||||
return -EINVAL;
|
||||
|
||||
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
|
||||
|
@ -3530,7 +3538,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
|||
|
||||
if (!vmx->nested.nested_run_pending ||
|
||||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
|
||||
vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read();
|
||||
if (kvm_mpx_supported() &&
|
||||
(!vmx->nested.nested_run_pending ||
|
||||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
|
||||
|
@ -4608,6 +4616,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|||
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
|
||||
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
|
||||
|
||||
/*
|
||||
* Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02.
|
||||
* Writes to DEBUGCTL that aren't intercepted by L1 are immediately
|
||||
* propagated to vmcs12 (see vmx_set_msr()), as the value loaded into
|
||||
* vmcs02 doesn't strictly track vmcs12.
|
||||
*/
|
||||
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
|
||||
vmcs12->guest_dr7 = vcpu->arch.dr7;
|
||||
|
||||
|
@ -4798,7 +4812,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|||
__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
|
||||
|
||||
kvm_set_dr(vcpu, 7, 0x400);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
vmx_guest_debugctl_write(vcpu, 0);
|
||||
|
||||
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
|
||||
vmcs12->vm_exit_msr_load_count))
|
||||
|
@ -4853,6 +4867,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
|||
WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
|
||||
}
|
||||
|
||||
/* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */
|
||||
vmx_reload_guest_debugctl(vcpu);
|
||||
|
||||
/*
|
||||
* Note that calling vmx_set_{efer,cr0,cr4} is important as they
|
||||
* handle a variety of side effects to KVM's software model.
|
||||
|
|
|
@ -653,11 +653,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
u64 data = vmx_guest_debugctl_read();
|
||||
|
||||
if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
|
||||
data &= ~DEBUGCTLMSR_LBR;
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, data);
|
||||
vmx_guest_debugctl_write(vcpu, data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -730,7 +730,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (!lbr_desc->event) {
|
||||
vmx_disable_lbr_msrs_passthrough(vcpu);
|
||||
if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
|
||||
if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR)
|
||||
goto warn;
|
||||
if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
|
||||
goto warn;
|
||||
|
@ -752,7 +752,7 @@ warn:
|
|||
|
||||
static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
|
||||
if (!(vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR))
|
||||
intel_pmu_release_guest_lbr_event(vcpu);
|
||||
}
|
||||
|
||||
|
|
|
@ -783,8 +783,6 @@ void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
|||
else
|
||||
vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
|
||||
|
||||
vt->host_debugctlmsr = get_debugctlmsr();
|
||||
|
||||
vt->guest_state_loaded = true;
|
||||
}
|
||||
|
||||
|
@ -1025,20 +1023,20 @@ static void tdx_load_host_xsave_state(struct kvm_vcpu *vcpu)
|
|||
DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI | \
|
||||
DEBUGCTLMSR_FREEZE_IN_SMM)
|
||||
|
||||
fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
|
||||
fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
|
||||
{
|
||||
struct vcpu_tdx *tdx = to_tdx(vcpu);
|
||||
struct vcpu_vt *vt = to_vt(vcpu);
|
||||
|
||||
/*
|
||||
* force_immediate_exit requires vCPU entering for events injection with
|
||||
* an immediately exit followed. But The TDX module doesn't guarantee
|
||||
* entry, it's already possible for KVM to _think_ it completely entry
|
||||
* to the guest without actually having done so.
|
||||
* Since KVM never needs to force an immediate exit for TDX, and can't
|
||||
* do direct injection, just warn on force_immediate_exit.
|
||||
* WARN if KVM wants to force an immediate exit, as the TDX module does
|
||||
* not guarantee entry into the guest, i.e. it's possible for KVM to
|
||||
* _think_ it completed entry to the guest and forced an immediate exit
|
||||
* without actually having done so. Luckily, KVM never needs to force
|
||||
* an immediate exit for TDX (KVM can't do direct event injection, so
|
||||
* just WARN and continue on.
|
||||
*/
|
||||
WARN_ON_ONCE(force_immediate_exit);
|
||||
WARN_ON_ONCE(run_flags);
|
||||
|
||||
/*
|
||||
* Wait until retry of SEPT-zap-related SEAMCALL completes before
|
||||
|
@ -1048,7 +1046,7 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
|
|||
if (unlikely(READ_ONCE(to_kvm_tdx(vcpu->kvm)->wait_for_sept_zap)))
|
||||
return EXIT_FASTPATH_EXIT_HANDLED;
|
||||
|
||||
trace_kvm_entry(vcpu, force_immediate_exit);
|
||||
trace_kvm_entry(vcpu, run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT);
|
||||
|
||||
if (pi_test_on(&vt->pi_desc)) {
|
||||
apic->send_IPI_self(POSTED_INTR_VECTOR);
|
||||
|
@ -1060,8 +1058,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
|
|||
|
||||
tdx_vcpu_enter_exit(vcpu);
|
||||
|
||||
if (vt->host_debugctlmsr & ~TDX_DEBUGCTL_PRESERVED)
|
||||
update_debugctlmsr(vt->host_debugctlmsr);
|
||||
if (vcpu->arch.host_debugctl & ~TDX_DEBUGCTL_PRESERVED)
|
||||
update_debugctlmsr(vcpu->arch.host_debugctl);
|
||||
|
||||
tdx_load_host_xsave_state(vcpu);
|
||||
tdx->guest_entered = true;
|
||||
|
|
|
@ -167,31 +167,6 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
|
|||
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
|
||||
RTIT_STATUS_BYTECNT))
|
||||
|
||||
/*
|
||||
* List of MSRs that can be directly passed to the guest.
|
||||
* In addition to these x2apic, PT and LBR MSRs are handled specially.
|
||||
*/
|
||||
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
|
||||
MSR_IA32_SPEC_CTRL,
|
||||
MSR_IA32_PRED_CMD,
|
||||
MSR_IA32_FLUSH_CMD,
|
||||
MSR_IA32_TSC,
|
||||
#ifdef CONFIG_X86_64
|
||||
MSR_FS_BASE,
|
||||
MSR_GS_BASE,
|
||||
MSR_KERNEL_GS_BASE,
|
||||
MSR_IA32_XFD,
|
||||
MSR_IA32_XFD_ERR,
|
||||
#endif
|
||||
MSR_IA32_SYSENTER_CS,
|
||||
MSR_IA32_SYSENTER_ESP,
|
||||
MSR_IA32_SYSENTER_EIP,
|
||||
MSR_CORE_C1_RES,
|
||||
MSR_CORE_C3_RESIDENCY,
|
||||
MSR_CORE_C6_RESIDENCY,
|
||||
MSR_CORE_C7_RESIDENCY,
|
||||
};
|
||||
|
||||
/*
|
||||
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
|
||||
* ple_gap: upper bound on the amount of time between two successive
|
||||
|
@ -674,40 +649,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
|
|||
return flexpriority_enabled && lapic_in_kernel(vcpu);
|
||||
}
|
||||
|
||||
static int vmx_get_passthrough_msr_slot(u32 msr)
|
||||
{
|
||||
int i;
|
||||
|
||||
switch (msr) {
|
||||
case 0x800 ... 0x8ff:
|
||||
/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
|
||||
return -ENOENT;
|
||||
case MSR_IA32_RTIT_STATUS:
|
||||
case MSR_IA32_RTIT_OUTPUT_BASE:
|
||||
case MSR_IA32_RTIT_OUTPUT_MASK:
|
||||
case MSR_IA32_RTIT_CR3_MATCH:
|
||||
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
|
||||
/* PT MSRs. These are handled in pt_update_intercept_for_msr() */
|
||||
case MSR_LBR_SELECT:
|
||||
case MSR_LBR_TOS:
|
||||
case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31:
|
||||
case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31:
|
||||
case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
|
||||
case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
|
||||
case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
|
||||
/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
|
||||
if (vmx_possible_passthrough_msrs[i] == msr)
|
||||
return i;
|
||||
}
|
||||
|
||||
WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
|
||||
{
|
||||
int i;
|
||||
|
@ -2153,7 +2094,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
|
||||
break;
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
msr_info->data = vmx_guest_debugctl_read();
|
||||
break;
|
||||
default:
|
||||
find_uret_msr:
|
||||
|
@ -2178,7 +2119,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
|
|||
return (unsigned long)data;
|
||||
}
|
||||
|
||||
static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
|
||||
u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
|
||||
{
|
||||
u64 debugctl = 0;
|
||||
|
||||
|
@ -2190,9 +2131,25 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
|
|||
(host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
|
||||
debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_RTM) &&
|
||||
(host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_RTM)))
|
||||
debugctl |= DEBUGCTLMSR_RTM_DEBUG;
|
||||
|
||||
return debugctl;
|
||||
}
|
||||
|
||||
bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated)
|
||||
{
|
||||
u64 invalid;
|
||||
|
||||
invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated);
|
||||
if (invalid & (DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) {
|
||||
kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
|
||||
invalid &= ~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR);
|
||||
}
|
||||
return !invalid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Writes msr value into the appropriate "register".
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
|
@ -2261,29 +2218,22 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
}
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, data);
|
||||
break;
|
||||
case MSR_IA32_DEBUGCTLMSR: {
|
||||
u64 invalid;
|
||||
|
||||
invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
|
||||
if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
|
||||
kvm_pr_unimpl_wrmsr(vcpu, msr_index, data);
|
||||
data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
|
||||
invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
|
||||
}
|
||||
|
||||
if (invalid)
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
if (!vmx_is_valid_debugctl(vcpu, data, msr_info->host_initiated))
|
||||
return 1;
|
||||
|
||||
data &= vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
|
||||
|
||||
if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
|
||||
VM_EXIT_SAVE_DEBUG_CONTROLS)
|
||||
get_vmcs12(vcpu)->guest_ia32_debugctl = data;
|
||||
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, data);
|
||||
vmx_guest_debugctl_write(vcpu, data);
|
||||
|
||||
if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
|
||||
(data & DEBUGCTLMSR_LBR))
|
||||
intel_pmu_create_guest_lbr_event(vcpu);
|
||||
return 0;
|
||||
}
|
||||
case MSR_IA32_BNDCFGS:
|
||||
if (!kvm_mpx_supported() ||
|
||||
(!msr_info->host_initiated &&
|
||||
|
@ -4017,76 +3967,29 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
|
|||
vmx->nested.force_msr_bitmap_recalc = true;
|
||||
}
|
||||
|
||||
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
|
||||
void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
|
||||
int idx;
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
return;
|
||||
|
||||
vmx_msr_bitmap_l01_changed(vmx);
|
||||
|
||||
/*
|
||||
* Mark the desired intercept state in shadow bitmap, this is needed
|
||||
* for resync when the MSR filters change.
|
||||
*/
|
||||
idx = vmx_get_passthrough_msr_slot(msr);
|
||||
if (idx >= 0) {
|
||||
if (type & MSR_TYPE_R)
|
||||
clear_bit(idx, vmx->shadow_msr_intercept.read);
|
||||
if (type & MSR_TYPE_W)
|
||||
clear_bit(idx, vmx->shadow_msr_intercept.write);
|
||||
if (type & MSR_TYPE_R) {
|
||||
if (!set && kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
|
||||
vmx_clear_msr_bitmap_read(msr_bitmap, msr);
|
||||
else
|
||||
vmx_set_msr_bitmap_read(msr_bitmap, msr);
|
||||
}
|
||||
|
||||
if ((type & MSR_TYPE_R) &&
|
||||
!kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
|
||||
vmx_set_msr_bitmap_read(msr_bitmap, msr);
|
||||
type &= ~MSR_TYPE_R;
|
||||
if (type & MSR_TYPE_W) {
|
||||
if (!set && kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
|
||||
vmx_clear_msr_bitmap_write(msr_bitmap, msr);
|
||||
else
|
||||
vmx_set_msr_bitmap_write(msr_bitmap, msr);
|
||||
}
|
||||
|
||||
if ((type & MSR_TYPE_W) &&
|
||||
!kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) {
|
||||
vmx_set_msr_bitmap_write(msr_bitmap, msr);
|
||||
type &= ~MSR_TYPE_W;
|
||||
}
|
||||
|
||||
if (type & MSR_TYPE_R)
|
||||
vmx_clear_msr_bitmap_read(msr_bitmap, msr);
|
||||
|
||||
if (type & MSR_TYPE_W)
|
||||
vmx_clear_msr_bitmap_write(msr_bitmap, msr);
|
||||
}
|
||||
|
||||
void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
|
||||
int idx;
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
return;
|
||||
|
||||
vmx_msr_bitmap_l01_changed(vmx);
|
||||
|
||||
/*
|
||||
* Mark the desired intercept state in shadow bitmap, this is needed
|
||||
* for resync when the MSR filter changes.
|
||||
*/
|
||||
idx = vmx_get_passthrough_msr_slot(msr);
|
||||
if (idx >= 0) {
|
||||
if (type & MSR_TYPE_R)
|
||||
set_bit(idx, vmx->shadow_msr_intercept.read);
|
||||
if (type & MSR_TYPE_W)
|
||||
set_bit(idx, vmx->shadow_msr_intercept.write);
|
||||
}
|
||||
|
||||
if (type & MSR_TYPE_R)
|
||||
vmx_set_msr_bitmap_read(msr_bitmap, msr);
|
||||
|
||||
if (type & MSR_TYPE_W)
|
||||
vmx_set_msr_bitmap_write(msr_bitmap, msr);
|
||||
}
|
||||
|
||||
static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
|
||||
|
@ -4165,35 +4068,57 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
|
||||
void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u32 i;
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Redo intercept permissions for MSRs that KVM is passing through to
|
||||
* the guest. Disabling interception will check the new MSR filter and
|
||||
* ensure that KVM enables interception if usersepace wants to filter
|
||||
* the MSR. MSRs that KVM is already intercepting don't need to be
|
||||
* refreshed since KVM is going to intercept them regardless of what
|
||||
* userspace wants.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
|
||||
u32 msr = vmx_possible_passthrough_msrs[i];
|
||||
|
||||
if (!test_bit(i, vmx->shadow_msr_intercept.read))
|
||||
vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
|
||||
|
||||
if (!test_bit(i, vmx->shadow_msr_intercept.write))
|
||||
vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
|
||||
#ifdef CONFIG_X86_64
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
|
||||
#endif
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
|
||||
if (kvm_cstate_in_guest(vcpu->kvm)) {
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
|
||||
}
|
||||
if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
|
||||
}
|
||||
|
||||
/* PT MSRs can be passed through iff PT is exposed to the guest. */
|
||||
if (vmx_pt_mode_is_host_guest())
|
||||
pt_update_intercept_for_msr(vcpu);
|
||||
|
||||
if (vcpu->arch.xfd_no_write_intercept)
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, MSR_TYPE_RW);
|
||||
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
|
||||
!to_vmx(vcpu)->spec_ctrl);
|
||||
|
||||
if (kvm_cpu_cap_has(X86_FEATURE_XFD))
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBPB))
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
|
||||
!guest_has_pred_cmd_msr(vcpu));
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D))
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
|
||||
|
||||
/*
|
||||
* x2APIC and LBR MSR intercepts are modified on-demand and cannot be
|
||||
* filtered by userspace.
|
||||
*/
|
||||
}
|
||||
|
||||
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
|
||||
|
@ -4794,7 +4719,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
|
|||
vmcs_write32(GUEST_SYSENTER_CS, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_EIP, 0);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
|
||||
vmx_guest_debugctl_write(&vmx->vcpu, 0);
|
||||
|
||||
if (cpu_has_vmx_tpr_shadow()) {
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
|
||||
|
@ -5610,12 +5536,6 @@ void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
|
|||
set_debugreg(DR6_RESERVED, 6);
|
||||
}
|
||||
|
||||
void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
set_debugreg(vcpu->arch.dr6, 6);
|
||||
}
|
||||
|
||||
void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
vmcs_writel(GUEST_DR7, val);
|
||||
|
@ -7327,8 +7247,9 @@ out:
|
|||
guest_state_exit_irqoff();
|
||||
}
|
||||
|
||||
fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
|
||||
fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
|
||||
{
|
||||
bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT;
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned long cr3, cr4;
|
||||
|
||||
|
@ -7373,6 +7294,12 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
|
|||
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
|
||||
vcpu->arch.regs_dirty = 0;
|
||||
|
||||
if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
|
||||
set_debugreg(vcpu->arch.dr6, 6);
|
||||
|
||||
if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
|
||||
vmx_reload_guest_debugctl(vcpu);
|
||||
|
||||
/*
|
||||
* Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
|
||||
* prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
|
||||
|
@ -7547,26 +7474,6 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
|
|||
evmcs->hv_enlightenments_control.msr_bitmap = 1;
|
||||
}
|
||||
|
||||
/* The MSR bitmap starts with all ones */
|
||||
bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
|
||||
bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
|
||||
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
|
||||
#ifdef CONFIG_X86_64
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
|
||||
#endif
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
|
||||
if (kvm_cstate_in_guest(vcpu->kvm)) {
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
|
||||
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
|
||||
}
|
||||
|
||||
vmx->loaded_vmcs = &vmx->vmcs01;
|
||||
|
||||
if (cpu_need_virtualize_apic_accesses(vcpu)) {
|
||||
|
@ -7616,7 +7523,7 @@ free_vpid:
|
|||
int vmx_vm_init(struct kvm *kvm)
|
||||
{
|
||||
if (!ple_gap)
|
||||
kvm->arch.pause_in_guest = true;
|
||||
kvm_disable_exits(kvm, KVM_X86_DISABLE_EXITS_PAUSE);
|
||||
|
||||
if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
|
||||
switch (l1tf_mitigation) {
|
||||
|
@ -7853,18 +7760,6 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
if (kvm_cpu_cap_has(X86_FEATURE_XFD))
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB))
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
|
||||
!guest_has_pred_cmd_msr(vcpu));
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
|
||||
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
|
||||
|
||||
set_cr4_guest_host_mask(vmx);
|
||||
|
||||
vmx_write_encls_bitmap(vcpu, NULL);
|
||||
|
@ -7880,6 +7775,9 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
vmx->msr_ia32_feature_control_valid_bits &=
|
||||
~FEAT_CTL_SGX_LC_ENABLED;
|
||||
|
||||
/* Recalc MSR interception to account for feature changes. */
|
||||
vmx_recalc_msr_intercepts(vcpu);
|
||||
|
||||
/* Refresh #PF interception to account for MAXPHYADDR changes. */
|
||||
vmx_update_exception_bitmap(vcpu);
|
||||
}
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
#include "../mmu.h"
|
||||
#include "common.h"
|
||||
|
||||
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define MAX_NR_USER_RETURN_MSRS 7
|
||||
#else
|
||||
|
@ -296,13 +294,6 @@ struct vcpu_vmx {
|
|||
struct pt_desc pt_desc;
|
||||
struct lbr_desc lbr_desc;
|
||||
|
||||
/* Save desired MSR intercept (read: pass-through) state */
|
||||
#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16
|
||||
struct {
|
||||
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
|
||||
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
|
||||
} shadow_msr_intercept;
|
||||
|
||||
/* ve_info must be page aligned. */
|
||||
struct vmx_ve_information *ve_info;
|
||||
};
|
||||
|
@ -395,24 +386,54 @@ bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
|
|||
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
|
||||
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
|
||||
|
||||
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
|
||||
void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
|
||||
void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set);
|
||||
|
||||
static inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
|
||||
u32 msr, int type)
|
||||
{
|
||||
vmx_set_intercept_for_msr(vcpu, msr, type, false);
|
||||
}
|
||||
|
||||
static inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
|
||||
u32 msr, int type)
|
||||
{
|
||||
vmx_set_intercept_for_msr(vcpu, msr, type, true);
|
||||
}
|
||||
|
||||
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
|
||||
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
|
||||
|
||||
gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
|
||||
|
||||
static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
|
||||
int type, bool value)
|
||||
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
|
||||
|
||||
u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
|
||||
bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated);
|
||||
|
||||
#define VMX_HOST_OWNED_DEBUGCTL_BITS (DEBUGCTLMSR_FREEZE_IN_SMM)
|
||||
|
||||
static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
if (value)
|
||||
vmx_enable_intercept_for_msr(vcpu, msr, type);
|
||||
else
|
||||
vmx_disable_intercept_for_msr(vcpu, msr, type);
|
||||
WARN_ON_ONCE(val & VMX_HOST_OWNED_DEBUGCTL_BITS);
|
||||
|
||||
val |= vcpu->arch.host_debugctl & VMX_HOST_OWNED_DEBUGCTL_BITS;
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, val);
|
||||
}
|
||||
|
||||
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
|
||||
static inline u64 vmx_guest_debugctl_read(void)
|
||||
{
|
||||
return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~VMX_HOST_OWNED_DEBUGCTL_BITS;
|
||||
}
|
||||
|
||||
static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
|
||||
if (!((val ^ vcpu->arch.host_debugctl) & VMX_HOST_OWNED_DEBUGCTL_BITS))
|
||||
return;
|
||||
|
||||
vmx_guest_debugctl_write(vcpu, val & ~VMX_HOST_OWNED_DEBUGCTL_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, early Intel manuals have the write-low and read-high bitmap offsets
|
||||
|
|
|
@ -21,7 +21,7 @@ void vmx_vm_destroy(struct kvm *kvm);
|
|||
int vmx_vcpu_precreate(struct kvm *kvm);
|
||||
int vmx_vcpu_create(struct kvm_vcpu *vcpu);
|
||||
int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu);
|
||||
fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
|
||||
fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags);
|
||||
void vmx_vcpu_free(struct kvm_vcpu *vcpu);
|
||||
void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
||||
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
|
@ -52,7 +52,7 @@ void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
|
|||
int trig_mode, int vector);
|
||||
void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
|
||||
bool vmx_has_emulated_msr(struct kvm *kvm, u32 index);
|
||||
void vmx_msr_filter_changed(struct kvm_vcpu *vcpu);
|
||||
void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu);
|
||||
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
|
||||
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
|
||||
int vmx_get_feature_msr(u32 msr, u64 *data);
|
||||
|
@ -133,7 +133,7 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
|||
void tdx_vcpu_free(struct kvm_vcpu *vcpu);
|
||||
void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu);
|
||||
fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
|
||||
fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags);
|
||||
void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
|
||||
void tdx_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -4582,6 +4582,9 @@ static u64 kvm_get_allowed_disable_exits(void)
|
|||
{
|
||||
u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
r |= KVM_X86_DISABLE_EXITS_APERFMPERF;
|
||||
|
||||
if (!mitigate_smt_rsb) {
|
||||
r |= KVM_X86_DISABLE_EXITS_HLT |
|
||||
KVM_X86_DISABLE_EXITS_CSTATE;
|
||||
|
@ -5495,12 +5498,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
|||
(events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
|
||||
return -EINVAL;
|
||||
|
||||
/* INITs are latched while in SMM */
|
||||
if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
|
||||
(events->smi.smm || events->smi.pending) &&
|
||||
vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
|
||||
return -EINVAL;
|
||||
|
||||
process_nmi(vcpu);
|
||||
|
||||
/*
|
||||
|
@ -6490,17 +6487,11 @@ split_irqchip_unlock:
|
|||
|
||||
if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
|
||||
cpu_smt_possible() &&
|
||||
(cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
|
||||
(cap->args[0] & ~(KVM_X86_DISABLE_EXITS_PAUSE |
|
||||
KVM_X86_DISABLE_EXITS_APERFMPERF)))
|
||||
pr_warn_once(SMT_RSB_MSG);
|
||||
|
||||
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
|
||||
kvm->arch.pause_in_guest = true;
|
||||
if (cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT)
|
||||
kvm->arch.mwait_in_guest = true;
|
||||
if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
|
||||
kvm->arch.hlt_in_guest = true;
|
||||
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
|
||||
kvm->arch.cstate_in_guest = true;
|
||||
kvm_disable_exits(kvm, cap->args[0]);
|
||||
r = 0;
|
||||
disable_exits_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
@ -7211,9 +7202,12 @@ set_pit2_out:
|
|||
if (user_tsc_khz == 0)
|
||||
user_tsc_khz = tsc_khz;
|
||||
|
||||
WRITE_ONCE(kvm->arch.default_tsc_khz, user_tsc_khz);
|
||||
r = 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (!kvm->created_vcpus) {
|
||||
WRITE_ONCE(kvm->arch.default_tsc_khz, user_tsc_khz);
|
||||
r = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
goto out;
|
||||
}
|
||||
case KVM_GET_TSC_KHZ: {
|
||||
|
@ -10657,6 +10651,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
dm_request_for_irq_injection(vcpu) &&
|
||||
kvm_cpu_accept_dm_intr(vcpu);
|
||||
fastpath_t exit_fastpath;
|
||||
u64 run_flags, debug_ctl;
|
||||
|
||||
bool req_immediate_exit = false;
|
||||
|
||||
|
@ -10804,8 +10799,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
kvm_vcpu_update_apicv(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
|
||||
kvm_check_async_pf_completion(vcpu);
|
||||
|
||||
/*
|
||||
* Recalc MSR intercepts as userspace may want to intercept
|
||||
* accesses to MSRs that KVM would otherwise pass through to
|
||||
* the guest.
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
|
||||
kvm_x86_call(msr_filter_changed)(vcpu);
|
||||
kvm_x86_call(recalc_msr_intercepts)(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
|
||||
kvm_x86_call(update_cpu_dirty_logging)(vcpu);
|
||||
|
@ -10901,8 +10902,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
goto cancel_injection;
|
||||
}
|
||||
|
||||
if (req_immediate_exit)
|
||||
run_flags = 0;
|
||||
if (req_immediate_exit) {
|
||||
run_flags |= KVM_RUN_FORCE_IMMEDIATE_EXIT;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
if (test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
|
@ -10920,12 +10924,22 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
set_debugreg(vcpu->arch.eff_db[3], 3);
|
||||
/* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
|
||||
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
|
||||
kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6);
|
||||
run_flags |= KVM_RUN_LOAD_GUEST_DR6;
|
||||
} else if (unlikely(hw_breakpoint_active())) {
|
||||
set_debugreg(DR7_FIXED_1, 7);
|
||||
}
|
||||
|
||||
vcpu->arch.host_debugctl = get_debugctlmsr();
|
||||
/*
|
||||
* Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL
|
||||
* can be modified in IRQ context, e.g. via SMP function calls. Inform
|
||||
* vendor code if any host-owned bits were changed, e.g. so that the
|
||||
* value loaded into hardware while running the guest can be updated.
|
||||
*/
|
||||
debug_ctl = get_debugctlmsr();
|
||||
if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL &&
|
||||
!vcpu->arch.guest_state_protected)
|
||||
run_flags |= KVM_RUN_LOAD_DEBUGCTL;
|
||||
vcpu->arch.host_debugctl = debug_ctl;
|
||||
|
||||
guest_timing_enter_irqoff();
|
||||
|
||||
|
@ -10939,8 +10953,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
|
||||
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
|
||||
|
||||
exit_fastpath = kvm_x86_call(vcpu_run)(vcpu,
|
||||
req_immediate_exit);
|
||||
exit_fastpath = kvm_x86_call(vcpu_run)(vcpu, run_flags);
|
||||
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
|
||||
break;
|
||||
|
||||
|
@ -10952,6 +10965,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
}
|
||||
|
||||
run_flags = 0;
|
||||
|
||||
/* Note, VM-Exits that go down the "slow" path are accounted below. */
|
||||
++vcpu->stat.exits;
|
||||
}
|
||||
|
@ -11425,6 +11440,28 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
|||
trace_kvm_fpu(0);
|
||||
}
|
||||
|
||||
static int kvm_x86_vcpu_pre_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* SIPI_RECEIVED is obsolete; KVM leaves the vCPU in Wait-For-SIPI and
|
||||
* tracks the pending SIPI separately. SIPI_RECEIVED is still accepted
|
||||
* by KVM_SET_VCPU_EVENTS for backwards compatibility, but should be
|
||||
* converted to INIT_RECEIVED.
|
||||
*/
|
||||
if (WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Disallow running the vCPU if userspace forced it into an impossible
|
||||
* MP_STATE, e.g. if the vCPU is in WFS but SIPI is blocked.
|
||||
*/
|
||||
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED &&
|
||||
!kvm_apic_init_sipi_allowed(vcpu))
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_x86_call(vcpu_pre_run)(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_queued_exception *ex = &vcpu->arch.exception;
|
||||
|
@ -11527,7 +11564,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
|||
goto out;
|
||||
}
|
||||
|
||||
r = kvm_x86_call(vcpu_pre_run)(vcpu);
|
||||
r = kvm_x86_vcpu_pre_run(vcpu);
|
||||
if (r <= 0)
|
||||
goto out;
|
||||
|
||||
|
@ -11771,21 +11808,16 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
/*
|
||||
* Pending INITs are reported using KVM_SET_VCPU_EVENTS, disallow
|
||||
* forcing the guest into INIT/SIPI if those events are supposed to be
|
||||
* blocked. KVM prioritizes SMI over INIT, so reject INIT/SIPI state
|
||||
* if an SMI is pending as well.
|
||||
* SIPI_RECEIVED is obsolete and no longer used internally; KVM instead
|
||||
* leaves the vCPU in INIT_RECIEVED (Wait-For-SIPI) and pends the SIPI.
|
||||
* Translate SIPI_RECEIVED as appropriate for backwards compatibility.
|
||||
*/
|
||||
if ((!kvm_apic_init_sipi_allowed(vcpu) || vcpu->arch.smi_pending) &&
|
||||
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
|
||||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
|
||||
goto out;
|
||||
|
||||
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
|
||||
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
|
||||
mp_state->mp_state = KVM_MP_STATE_INIT_RECEIVED;
|
||||
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
|
||||
} else
|
||||
kvm_set_mp_state(vcpu, mp_state->mp_state);
|
||||
}
|
||||
|
||||
kvm_set_mp_state(vcpu, mp_state->mp_state);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
ret = 0;
|
||||
|
|
|
@ -499,24 +499,34 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
|
|||
__rem; \
|
||||
})
|
||||
|
||||
static inline void kvm_disable_exits(struct kvm *kvm, u64 mask)
|
||||
{
|
||||
kvm->arch.disabled_exits |= mask;
|
||||
}
|
||||
|
||||
static inline bool kvm_mwait_in_guest(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.mwait_in_guest;
|
||||
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_MWAIT;
|
||||
}
|
||||
|
||||
static inline bool kvm_hlt_in_guest(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.hlt_in_guest;
|
||||
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_HLT;
|
||||
}
|
||||
|
||||
static inline bool kvm_pause_in_guest(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.pause_in_guest;
|
||||
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_PAUSE;
|
||||
}
|
||||
|
||||
static inline bool kvm_cstate_in_guest(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.cstate_in_guest;
|
||||
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_CSTATE;
|
||||
}
|
||||
|
||||
static inline bool kvm_aperfmperf_in_guest(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_APERFMPERF;
|
||||
}
|
||||
|
||||
static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
|
||||
|
|
|
@ -644,6 +644,7 @@ struct kvm_ioeventfd {
|
|||
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
|
||||
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
|
||||
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
|
||||
#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
|
||||
|
||||
/* for KVM_ENABLE_CAP */
|
||||
struct kvm_enable_cap {
|
||||
|
|
|
@ -618,6 +618,7 @@ struct kvm_ioeventfd {
|
|||
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
|
||||
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
|
||||
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
|
||||
#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
|
||||
|
||||
/* for KVM_ENABLE_CAP */
|
||||
struct kvm_enable_cap {
|
||||
|
|
|
@ -135,6 +135,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test
|
|||
TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
|
||||
TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
|
||||
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
|
||||
TEST_GEN_PROGS_x86 += x86/aperfmperf_test
|
||||
TEST_GEN_PROGS_x86 += access_tracking_perf_test
|
||||
TEST_GEN_PROGS_x86 += coalesced_io_test
|
||||
TEST_GEN_PROGS_x86 += dirty_log_perf_test
|
||||
|
|
|
@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void)
|
|||
static int test_migrate_vcpu(unsigned int vcpu_idx)
|
||||
{
|
||||
int ret;
|
||||
cpu_set_t cpuset;
|
||||
uint32_t new_pcpu = test_get_pcpu();
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(new_pcpu, &cpuset);
|
||||
|
||||
pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
|
||||
|
||||
ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
|
||||
sizeof(cpuset), &cpuset);
|
||||
ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
|
||||
|
||||
/* Allow the error where the vCPU thread is already finished */
|
||||
TEST_ASSERT(ret == 0 || ret == ESRCH,
|
||||
|
|
|
@ -862,25 +862,6 @@ static uint32_t next_pcpu(void)
|
|||
return next;
|
||||
}
|
||||
|
||||
static void migrate_self(uint32_t new_pcpu)
|
||||
{
|
||||
int ret;
|
||||
cpu_set_t cpuset;
|
||||
pthread_t thread;
|
||||
|
||||
thread = pthread_self();
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(new_pcpu, &cpuset);
|
||||
|
||||
pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
|
||||
|
||||
ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
|
||||
|
||||
TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
|
||||
new_pcpu, ret);
|
||||
}
|
||||
|
||||
static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
|
||||
enum arch_timer timer)
|
||||
{
|
||||
|
@ -907,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
|
|||
sched_yield();
|
||||
break;
|
||||
case USERSPACE_MIGRATE_SELF:
|
||||
migrate_self(next_pcpu());
|
||||
pin_self_to_cpu(next_pcpu());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -919,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
|
|||
struct ucall uc;
|
||||
|
||||
/* Start on CPU 0 */
|
||||
migrate_self(0);
|
||||
pin_self_to_cpu(0);
|
||||
|
||||
while (true) {
|
||||
vcpu_run(vcpu);
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include <sys/eventfd.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "kvm_util_arch.h"
|
||||
#include "kvm_util_types.h"
|
||||
#include "sparsebit.h"
|
||||
|
@ -1053,7 +1055,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
|
|||
|
||||
void kvm_set_files_rlimit(uint32_t nr_vcpus);
|
||||
|
||||
void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
|
||||
int __pin_task_to_cpu(pthread_t task, int cpu);
|
||||
|
||||
static inline void pin_task_to_cpu(pthread_t task, int cpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = __pin_task_to_cpu(task, cpu);
|
||||
TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
|
||||
}
|
||||
|
||||
static inline int pin_task_to_any_cpu(pthread_t task)
|
||||
{
|
||||
int cpu = sched_getcpu();
|
||||
|
||||
pin_task_to_cpu(task, cpu);
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static inline void pin_self_to_cpu(int cpu)
|
||||
{
|
||||
pin_task_to_cpu(pthread_self(), cpu);
|
||||
}
|
||||
|
||||
static inline int pin_self_to_any_cpu(void)
|
||||
{
|
||||
return pin_task_to_any_cpu(pthread_self());
|
||||
}
|
||||
|
||||
void kvm_print_vcpu_pinning_help(void);
|
||||
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
|
||||
int nr_vcpus);
|
||||
|
|
|
@ -605,15 +605,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
|
|||
return vm_vcpu_recreate(vm, 0);
|
||||
}
|
||||
|
||||
void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
|
||||
int __pin_task_to_cpu(pthread_t task, int cpu)
|
||||
{
|
||||
cpu_set_t mask;
|
||||
int r;
|
||||
cpu_set_t cpuset;
|
||||
|
||||
CPU_ZERO(&mask);
|
||||
CPU_SET(pcpu, &mask);
|
||||
r = sched_setaffinity(0, sizeof(mask), &mask);
|
||||
TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(cpu, &cpuset);
|
||||
|
||||
return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
|
||||
}
|
||||
|
||||
static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
|
||||
|
@ -667,7 +666,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
|
|||
|
||||
/* 2. Check if the main worker needs to be pinned. */
|
||||
if (cpu) {
|
||||
kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
|
||||
pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
|
||||
cpu = strtok(NULL, delim);
|
||||
}
|
||||
|
||||
|
|
|
@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data)
|
|||
int vcpu_idx = vcpu->vcpu_idx;
|
||||
|
||||
if (memstress_args.pin_vcpus)
|
||||
kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
|
||||
pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
|
||||
|
||||
WRITE_ONCE(vcpu->running, true);
|
||||
|
||||
|
|
213
tools/testing/selftests/kvm/x86/aperfmperf_test.c
Normal file
213
tools/testing/selftests/kvm/x86/aperfmperf_test.c
Normal file
|
@ -0,0 +1,213 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Test for KVM_X86_DISABLE_EXITS_APERFMPERF
|
||||
*
|
||||
* Copyright (C) 2025, Google LLC.
|
||||
*
|
||||
* Test the ability to disable VM-exits for rdmsr of IA32_APERF and
|
||||
* IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
|
||||
* return the host's values.
|
||||
*
|
||||
* Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "svm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "vmx.h"
|
||||
|
||||
#define NUM_ITERATIONS 10000
|
||||
|
||||
static int open_dev_msr(int cpu)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
||||
snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
|
||||
return open_path_or_exit(path, O_RDONLY);
|
||||
}
|
||||
|
||||
static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
|
||||
{
|
||||
uint64_t data;
|
||||
ssize_t rc;
|
||||
|
||||
rc = pread(msr_fd, &data, sizeof(data), msr);
|
||||
TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void guest_read_aperf_mperf(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_ITERATIONS; i++)
|
||||
GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
|
||||
}
|
||||
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
|
||||
static void l2_guest_code(void)
|
||||
{
|
||||
guest_read_aperf_mperf();
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void l1_svm_code(struct svm_test_data *svm)
|
||||
{
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
}
|
||||
|
||||
static void l1_vmx_code(struct vmx_pages *vmx)
|
||||
{
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
|
||||
GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
|
||||
GUEST_ASSERT_EQ(load_vmcs(vmx), true);
|
||||
|
||||
prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
/*
|
||||
* Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
|
||||
* in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
|
||||
*/
|
||||
vmwrite(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
|
||||
|
||||
GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
|
||||
GUEST_ASSERT(!vmlaunch());
|
||||
}
|
||||
|
||||
static void guest_code(void *nested_test_data)
|
||||
{
|
||||
guest_read_aperf_mperf();
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_SVM))
|
||||
l1_svm_code(nested_test_data);
|
||||
else if (this_cpu_has(X86_FEATURE_VMX))
|
||||
l1_vmx_code(nested_test_data);
|
||||
else
|
||||
GUEST_DONE();
|
||||
|
||||
TEST_FAIL("L2 should have signaled 'done'");
|
||||
}
|
||||
|
||||
static void guest_no_aperfmperf(void)
|
||||
{
|
||||
uint64_t msr_val;
|
||||
uint8_t vector;
|
||||
|
||||
vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
|
||||
GUEST_ASSERT(vector == GP_VECTOR);
|
||||
|
||||
vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
|
||||
GUEST_ASSERT(vector == GP_VECTOR);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
|
||||
uint64_t host_aperf_before, host_mperf_before;
|
||||
vm_vaddr_t nested_test_data_gva;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
int msr_fd, cpu, i;
|
||||
|
||||
/* Sanity check that APERF/MPERF are unsupported by default. */
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
|
||||
vcpu_run(vcpu);
|
||||
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
|
||||
kvm_vm_free(vm);
|
||||
|
||||
cpu = pin_self_to_any_cpu();
|
||||
|
||||
msr_fd = open_dev_msr(cpu);
|
||||
|
||||
/*
|
||||
* This test requires a non-standard VM initialization, because
|
||||
* KVM_ENABLE_CAP cannot be used on a VM file descriptor after
|
||||
* a VCPU has been created.
|
||||
*/
|
||||
vm = vm_create(1);
|
||||
|
||||
TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
|
||||
KVM_X86_DISABLE_EXITS_APERFMPERF);
|
||||
|
||||
vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
|
||||
KVM_X86_DISABLE_EXITS_APERFMPERF);
|
||||
|
||||
vcpu = vm_vcpu_add(vm, 0, guest_code);
|
||||
|
||||
if (!has_nested)
|
||||
nested_test_data_gva = NONCANONICAL;
|
||||
else if (kvm_cpu_has(X86_FEATURE_SVM))
|
||||
vcpu_alloc_svm(vm, &nested_test_data_gva);
|
||||
else
|
||||
vcpu_alloc_vmx(vm, &nested_test_data_gva);
|
||||
|
||||
vcpu_args_set(vcpu, 1, nested_test_data_gva);
|
||||
|
||||
host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
|
||||
host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
|
||||
|
||||
for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
|
||||
uint64_t host_aperf_after, host_mperf_after;
|
||||
uint64_t guest_aperf, guest_mperf;
|
||||
struct ucall uc;
|
||||
|
||||
vcpu_run(vcpu);
|
||||
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_DONE:
|
||||
goto done;
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
case UCALL_SYNC:
|
||||
guest_aperf = uc.args[0];
|
||||
guest_mperf = uc.args[1];
|
||||
|
||||
host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
|
||||
host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
|
||||
|
||||
TEST_ASSERT(host_aperf_before < guest_aperf,
|
||||
"APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
|
||||
host_aperf_before, guest_aperf);
|
||||
TEST_ASSERT(guest_aperf < host_aperf_after,
|
||||
"APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
|
||||
guest_aperf, host_aperf_after);
|
||||
TEST_ASSERT(host_mperf_before < guest_mperf,
|
||||
"MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
|
||||
host_mperf_before, guest_mperf);
|
||||
TEST_ASSERT(guest_mperf < host_mperf_after,
|
||||
"MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
|
||||
guest_mperf, host_mperf_after);
|
||||
|
||||
host_aperf_before = host_aperf_after;
|
||||
host_mperf_before = host_mperf_after;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
TEST_FAIL("Didn't receive UCALL_DONE\n");
|
||||
done:
|
||||
kvm_vm_free(vm);
|
||||
close(msr_fd);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
|
|||
data = test_rdmsr(MSR_GS_BASE);
|
||||
GUEST_ASSERT(data == MSR_GS_BASE);
|
||||
|
||||
/* Access the MSRs again to ensure KVM has disabled interception.*/
|
||||
data = test_rdmsr(MSR_FS_BASE);
|
||||
GUEST_ASSERT(data != MSR_FS_BASE);
|
||||
data = test_rdmsr(MSR_GS_BASE);
|
||||
GUEST_ASSERT(data != MSR_GS_BASE);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
|
@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
|
|||
"Expected ucall state to be UCALL_SYNC.");
|
||||
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
|
||||
run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
|
||||
|
||||
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
|
||||
run_guest_then_process_ucall_done(vcpu);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue