KVM: arm64: nv: Accelerate EL0 timer read accesses when FEAT_ECV in use

Although FEAT_ECV allows us to correctly emulate the timers, it also
reduces performances pretty badly.

Mitigate this by emulating the CTL/CVAL register reads in the
inner run loop, without returning to the general kernel.

Acked-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20241217142321.763801-6-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2024-12-17 14:23:13 +00:00
parent 2cd2a77f9c
commit 338f8ea519
4 changed files with 122 additions and 18 deletions

View file

@ -101,21 +101,6 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
}
}
static u64 timer_get_offset(struct arch_timer_context *ctxt)
{
u64 offset = 0;
if (!ctxt)
return 0;
if (ctxt->offset.vm_offset)
offset += *ctxt->offset.vm_offset;
if (ctxt->offset.vcpu_offset)
offset += *ctxt->offset.vcpu_offset;
return offset;
}
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
@ -964,10 +949,10 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
* which allows trapping of the timer registers even with NV2.
* Still, this is still worse than FEAT_NV on its own. Meh.
*/
if (cpus_have_final_cap(ARM64_HAS_ECV) || !is_hyp_ctxt(vcpu))
return;
if (!vcpu_el2_e2h_is_set(vcpu)) {
if (cpus_have_final_cap(ARM64_HAS_ECV))
return;
/*
* A non-VHE guest hypervisor doesn't have any direct access
* to its timers: the EL2 registers trap (and the HW is

View file

@ -501,6 +501,11 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
{
return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt);
}
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;

View file

@ -256,6 +256,102 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
}
static u64 compute_emulated_cntx_ctl_el0(struct kvm_vcpu *vcpu,
enum vcpu_sysreg reg)
{
unsigned long ctl;
u64 cval, cnt;
bool stat;
switch (reg) {
case CNTP_CTL_EL0:
cval = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
ctl = __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
cnt = compute_counter_value(vcpu_ptimer(vcpu));
break;
case CNTV_CTL_EL0:
cval = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
ctl = __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
cnt = compute_counter_value(vcpu_vtimer(vcpu));
break;
default:
BUG();
}
stat = cval <= cnt;
__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &ctl, stat);
return ctl;
}
static bool kvm_hyp_handle_timer(struct kvm_vcpu *vcpu, u64 *exit_code)
{
u64 esr, val;
/*
* Having FEAT_ECV allows for a better quality of timer emulation.
* However, this comes at a huge cost in terms of traps. Try and
* satisfy the reads from guest's hypervisor context without
* returning to the kernel if we can.
*/
if (!is_hyp_ctxt(vcpu))
return false;
esr = kvm_vcpu_get_esr(vcpu);
if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) != ESR_ELx_SYS64_ISS_DIR_READ)
return false;
switch (esr_sys64_to_sysreg(esr)) {
case SYS_CNTP_CTL_EL02:
val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
break;
case SYS_CNTP_CTL_EL0:
if (vcpu_el2_e2h_is_set(vcpu))
val = read_sysreg_el0(SYS_CNTP_CTL);
else
val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
break;
case SYS_CNTP_CVAL_EL02:
val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
break;
case SYS_CNTP_CVAL_EL0:
if (vcpu_el2_e2h_is_set(vcpu)) {
val = read_sysreg_el0(SYS_CNTP_CVAL);
if (!has_cntpoff())
val -= timer_get_offset(vcpu_hptimer(vcpu));
} else {
val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
}
break;
case SYS_CNTV_CTL_EL02:
val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
break;
case SYS_CNTV_CTL_EL0:
if (vcpu_el2_e2h_is_set(vcpu))
val = read_sysreg_el0(SYS_CNTV_CTL);
else
val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
break;
case SYS_CNTV_CVAL_EL02:
val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
break;
case SYS_CNTV_CVAL_EL0:
if (vcpu_el2_e2h_is_set(vcpu))
val = read_sysreg_el0(SYS_CNTV_CVAL);
else
val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
break;
default:
return false;
}
vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
__kvm_skip_instr(vcpu);
return true;
}
static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
{
u64 esr = kvm_vcpu_get_esr(vcpu);
@ -409,6 +505,9 @@ static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
return true;
if (kvm_hyp_handle_timer(vcpu, exit_code))
return true;
if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
return true;

View file

@ -156,4 +156,19 @@ static inline bool has_cntpoff(void)
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
static inline u64 timer_get_offset(struct arch_timer_context *ctxt)
{
u64 offset = 0;
if (!ctxt)
return 0;
if (ctxt->offset.vm_offset)
offset += *ctxt->offset.vm_offset;
if (ctxt->offset.vcpu_offset)
offset += *ctxt->offset.vcpu_offset;
return offset;
}
#endif