mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
KVM: arm64: nv: Accelerate EL0 timer read accesses when FEAT_ECV in use
Although FEAT_ECV allows us to correctly emulate the timers, it also reduces performances pretty badly. Mitigate this by emulating the CTL/CVAL register reads in the inner run loop, without returning to the general kernel. Acked-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20241217142321.763801-6-maz@kernel.org Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
parent
2cd2a77f9c
commit
338f8ea519
4 changed files with 122 additions and 18 deletions
|
@ -101,21 +101,6 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
|
|||
}
|
||||
}
|
||||
|
||||
static u64 timer_get_offset(struct arch_timer_context *ctxt)
|
||||
{
|
||||
u64 offset = 0;
|
||||
|
||||
if (!ctxt)
|
||||
return 0;
|
||||
|
||||
if (ctxt->offset.vm_offset)
|
||||
offset += *ctxt->offset.vm_offset;
|
||||
if (ctxt->offset.vcpu_offset)
|
||||
offset += *ctxt->offset.vcpu_offset;
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = ctxt->vcpu;
|
||||
|
@ -964,10 +949,10 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
|
|||
* which allows trapping of the timer registers even with NV2.
|
||||
* Still, this is still worse than FEAT_NV on its own. Meh.
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_HAS_ECV) || !is_hyp_ctxt(vcpu))
|
||||
return;
|
||||
|
||||
if (!vcpu_el2_e2h_is_set(vcpu)) {
|
||||
if (cpus_have_final_cap(ARM64_HAS_ECV))
|
||||
return;
|
||||
|
||||
/*
|
||||
* A non-VHE guest hypervisor doesn't have any direct access
|
||||
* to its timers: the EL2 registers trap (and the HW is
|
||||
|
|
|
@ -501,6 +501,11 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
|
||||
{
|
||||
return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt);
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_context *ctxt;
|
||||
|
|
|
@ -256,6 +256,102 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
|
|||
host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
|
||||
}
|
||||
|
||||
static u64 compute_emulated_cntx_ctl_el0(struct kvm_vcpu *vcpu,
|
||||
enum vcpu_sysreg reg)
|
||||
{
|
||||
unsigned long ctl;
|
||||
u64 cval, cnt;
|
||||
bool stat;
|
||||
|
||||
switch (reg) {
|
||||
case CNTP_CTL_EL0:
|
||||
cval = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
|
||||
ctl = __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
|
||||
cnt = compute_counter_value(vcpu_ptimer(vcpu));
|
||||
break;
|
||||
case CNTV_CTL_EL0:
|
||||
cval = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
|
||||
ctl = __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
|
||||
cnt = compute_counter_value(vcpu_vtimer(vcpu));
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
stat = cval <= cnt;
|
||||
__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &ctl, stat);
|
||||
|
||||
return ctl;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_timer(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
u64 esr, val;
|
||||
|
||||
/*
|
||||
* Having FEAT_ECV allows for a better quality of timer emulation.
|
||||
* However, this comes at a huge cost in terms of traps. Try and
|
||||
* satisfy the reads from guest's hypervisor context without
|
||||
* returning to the kernel if we can.
|
||||
*/
|
||||
if (!is_hyp_ctxt(vcpu))
|
||||
return false;
|
||||
|
||||
esr = kvm_vcpu_get_esr(vcpu);
|
||||
if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) != ESR_ELx_SYS64_ISS_DIR_READ)
|
||||
return false;
|
||||
|
||||
switch (esr_sys64_to_sysreg(esr)) {
|
||||
case SYS_CNTP_CTL_EL02:
|
||||
val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
|
||||
break;
|
||||
case SYS_CNTP_CTL_EL0:
|
||||
if (vcpu_el2_e2h_is_set(vcpu))
|
||||
val = read_sysreg_el0(SYS_CNTP_CTL);
|
||||
else
|
||||
val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
|
||||
break;
|
||||
case SYS_CNTP_CVAL_EL02:
|
||||
val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
|
||||
break;
|
||||
case SYS_CNTP_CVAL_EL0:
|
||||
if (vcpu_el2_e2h_is_set(vcpu)) {
|
||||
val = read_sysreg_el0(SYS_CNTP_CVAL);
|
||||
|
||||
if (!has_cntpoff())
|
||||
val -= timer_get_offset(vcpu_hptimer(vcpu));
|
||||
} else {
|
||||
val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
|
||||
}
|
||||
break;
|
||||
case SYS_CNTV_CTL_EL02:
|
||||
val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
|
||||
break;
|
||||
case SYS_CNTV_CTL_EL0:
|
||||
if (vcpu_el2_e2h_is_set(vcpu))
|
||||
val = read_sysreg_el0(SYS_CNTV_CTL);
|
||||
else
|
||||
val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
|
||||
break;
|
||||
case SYS_CNTV_CVAL_EL02:
|
||||
val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
|
||||
break;
|
||||
case SYS_CNTV_CVAL_EL0:
|
||||
if (vcpu_el2_e2h_is_set(vcpu))
|
||||
val = read_sysreg_el0(SYS_CNTV_CVAL);
|
||||
else
|
||||
val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
@ -409,6 +505,9 @@ static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
|
|||
if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
|
||||
return true;
|
||||
|
||||
if (kvm_hyp_handle_timer(vcpu, exit_code))
|
||||
return true;
|
||||
|
||||
if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
|
||||
return true;
|
||||
|
||||
|
|
|
@ -156,4 +156,19 @@ static inline bool has_cntpoff(void)
|
|||
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
|
||||
}
|
||||
|
||||
static inline u64 timer_get_offset(struct arch_timer_context *ctxt)
|
||||
{
|
||||
u64 offset = 0;
|
||||
|
||||
if (!ctxt)
|
||||
return 0;
|
||||
|
||||
if (ctxt->offset.vm_offset)
|
||||
offset += *ctxt->offset.vm_offset;
|
||||
if (ctxt->offset.vcpu_offset)
|
||||
offset += *ctxt->offset.vcpu_offset;
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue