KVM: TDX: Do TDX specific vcpu initialization

TD guest vcpu needs TDX specific initialization before running.  Repurpose
KVM_MEMORY_ENCRYPT_OP to vcpu-scope, add a new sub-command
KVM_TDX_INIT_VCPU, and implement the callback for it.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Co-developed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 - Fix comment: https://lore.kernel.org/kvm/Z36OYfRW9oPjW8be@google.com/
   (Sean)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Isaku Yamahata 2024-10-30 12:00:36 -07:00 committed by Paolo Bonzini
parent 9002f8cf52
commit a50f673f25
9 changed files with 205 additions and 2 deletions

View file

@ -127,6 +127,7 @@ KVM_X86_OP(enable_smi_window)
#endif #endif
KVM_X86_OP_OPTIONAL(dev_get_attr) KVM_X86_OP_OPTIONAL(dev_get_attr)
KVM_X86_OP(mem_enc_ioctl) KVM_X86_OP(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)

View file

@ -1849,6 +1849,7 @@ struct kvm_x86_ops {
int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);

View file

@ -931,6 +931,7 @@ struct kvm_hyperv_eventfd {
enum kvm_tdx_cmd_id { enum kvm_tdx_cmd_id {
KVM_TDX_CAPABILITIES = 0, KVM_TDX_CAPABILITIES = 0,
KVM_TDX_INIT_VM, KVM_TDX_INIT_VM,
KVM_TDX_INIT_VCPU,
KVM_TDX_CMD_NR_MAX, KVM_TDX_CMD_NR_MAX,
}; };

View file

@ -2657,6 +2657,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
kvm_recalculate_apic_map(vcpu->kvm); kvm_recalculate_apic_map(vcpu->kvm);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(kvm_apic_set_base);
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{ {

View file

@ -106,6 +106,14 @@ static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
return tdx_vm_ioctl(kvm, argp); return tdx_vm_ioctl(kvm, argp);
} }
static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
{
if (!is_td_vcpu(vcpu))
return -EINVAL;
return tdx_vcpu_ioctl(vcpu, argp);
}
#define VMX_REQUIRED_APICV_INHIBITS \ #define VMX_REQUIRED_APICV_INHIBITS \
(BIT(APICV_INHIBIT_REASON_DISABLED) | \ (BIT(APICV_INHIBIT_REASON_DISABLED) | \
BIT(APICV_INHIBIT_REASON_ABSENT) | \ BIT(APICV_INHIBIT_REASON_ABSENT) | \
@ -262,6 +270,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_untagged_addr = vmx_get_untagged_addr, .get_untagged_addr = vmx_get_untagged_addr,
.mem_enc_ioctl = vt_mem_enc_ioctl, .mem_enc_ioctl = vt_mem_enc_ioctl,
.vcpu_mem_enc_ioctl = vt_vcpu_mem_enc_ioctl,
}; };
struct kvm_x86_init_ops vt_init_ops __initdata = { struct kvm_x86_init_ops vt_init_ops __initdata = {

View file

@ -410,6 +410,7 @@ int tdx_vm_init(struct kvm *kvm)
int tdx_vcpu_create(struct kvm_vcpu *vcpu) int tdx_vcpu_create(struct kvm_vcpu *vcpu)
{ {
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
struct vcpu_tdx *tdx = to_tdx(vcpu);
if (kvm_tdx->state != TD_STATE_INITIALIZED) if (kvm_tdx->state != TD_STATE_INITIALIZED)
return -EIO; return -EIO;
@ -438,12 +439,42 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE) if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
vcpu->arch.xfd_no_write_intercept = true; vcpu->arch.xfd_no_write_intercept = true;
tdx->state = VCPU_TD_STATE_UNINITIALIZED;
return 0; return 0;
} }
void tdx_vcpu_free(struct kvm_vcpu *vcpu) void tdx_vcpu_free(struct kvm_vcpu *vcpu)
{ {
/* This is stub for now. More logic will come. */ struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
struct vcpu_tdx *tdx = to_tdx(vcpu);
int i;
/*
* It is not possible to reclaim pages while hkid is assigned. It might
* be assigned if:
* 1. the TD VM is being destroyed but freeing hkid failed, in which
* case the pages are leaked
* 2. TD VCPU creation failed and this on the error path, in which case
* there is nothing to do anyway
*/
if (is_hkid_assigned(kvm_tdx))
return;
if (tdx->vp.tdcx_pages) {
for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
if (tdx->vp.tdcx_pages[i])
tdx_reclaim_control_page(tdx->vp.tdcx_pages[i]);
}
kfree(tdx->vp.tdcx_pages);
tdx->vp.tdcx_pages = NULL;
}
if (tdx->vp.tdvpr_page) {
tdx_reclaim_control_page(tdx->vp.tdvpr_page);
tdx->vp.tdvpr_page = 0;
}
tdx->state = VCPU_TD_STATE_UNINITIALIZED;
} }
static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
@ -653,6 +684,8 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
goto free_hkid; goto free_hkid;
kvm_tdx->td.tdcs_nr_pages = tdx_sysinfo->td_ctrl.tdcs_base_size / PAGE_SIZE; kvm_tdx->td.tdcs_nr_pages = tdx_sysinfo->td_ctrl.tdcs_base_size / PAGE_SIZE;
/* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */
kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1;
tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages), tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages),
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
if (!tdcs_pages) if (!tdcs_pages)
@ -930,6 +963,143 @@ out:
return r; return r;
} }
/* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */
static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx)
{
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
struct vcpu_tdx *tdx = to_tdx(vcpu);
struct page *page;
int ret, i;
u64 err;
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
tdx->vp.tdvpr_page = page;
tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages),
GFP_KERNEL);
if (!tdx->vp.tdcx_pages) {
ret = -ENOMEM;
goto free_tdvpr;
}
for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
page = alloc_page(GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
goto free_tdcx;
}
tdx->vp.tdcx_pages[i] = page;
}
err = tdh_vp_create(&kvm_tdx->td, &tdx->vp);
if (KVM_BUG_ON(err, vcpu->kvm)) {
ret = -EIO;
pr_tdx_error(TDH_VP_CREATE, err);
goto free_tdcx;
}
for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
err = tdh_vp_addcx(&tdx->vp, tdx->vp.tdcx_pages[i]);
if (KVM_BUG_ON(err, vcpu->kvm)) {
pr_tdx_error(TDH_VP_ADDCX, err);
/*
* Pages already added are reclaimed by the vcpu_free
* method, but the rest are freed here.
*/
for (; i < kvm_tdx->td.tdcx_nr_pages; i++) {
__free_page(tdx->vp.tdcx_pages[i]);
tdx->vp.tdcx_pages[i] = NULL;
}
return -EIO;
}
}
err = tdh_vp_init(&tdx->vp, vcpu_rcx, vcpu->vcpu_id);
if (KVM_BUG_ON(err, vcpu->kvm)) {
pr_tdx_error(TDH_VP_INIT, err);
return -EIO;
}
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
return 0;
free_tdcx:
for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
if (tdx->vp.tdcx_pages[i])
__free_page(tdx->vp.tdcx_pages[i]);
tdx->vp.tdcx_pages[i] = NULL;
}
kfree(tdx->vp.tdcx_pages);
tdx->vp.tdcx_pages = NULL;
free_tdvpr:
if (tdx->vp.tdvpr_page)
__free_page(tdx->vp.tdvpr_page);
tdx->vp.tdvpr_page = 0;
return ret;
}
static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
{
u64 apic_base;
struct vcpu_tdx *tdx = to_tdx(vcpu);
int ret;
if (cmd->flags)
return -EINVAL;
if (tdx->state != VCPU_TD_STATE_UNINITIALIZED)
return -EINVAL;
/*
* TDX requires X2APIC, userspace is responsible for configuring guest
* CPUID accordingly.
*/
apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
(kvm_vcpu_is_reset_bsp(vcpu) ? MSR_IA32_APICBASE_BSP : 0);
if (kvm_apic_set_base(vcpu, apic_base, true))
return -EINVAL;
ret = tdx_td_vcpu_init(vcpu, (u64)cmd->data);
if (ret)
return ret;
tdx->state = VCPU_TD_STATE_INITIALIZED;
return 0;
}
int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
{
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
struct kvm_tdx_cmd cmd;
int ret;
if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
return -EINVAL;
if (copy_from_user(&cmd, argp, sizeof(cmd)))
return -EFAULT;
if (cmd.hw_error)
return -EINVAL;
switch (cmd.id) {
case KVM_TDX_INIT_VCPU:
ret = tdx_vcpu_init(vcpu, &cmd);
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
static int tdx_online_cpu(unsigned int cpu) static int tdx_online_cpu(unsigned int cpu)
{ {
unsigned long flags; unsigned long flags;

View file

@ -33,9 +33,18 @@ struct kvm_tdx {
struct tdx_td td; struct tdx_td td;
}; };
/* TDX module vCPU states */
enum vcpu_tdx_state {
VCPU_TD_STATE_UNINITIALIZED = 0,
VCPU_TD_STATE_INITIALIZED,
};
struct vcpu_tdx { struct vcpu_tdx {
struct kvm_vcpu vcpu; struct kvm_vcpu vcpu;
/* TDX specific members follow. */
struct tdx_vp vp;
enum vcpu_tdx_state state;
}; };
static inline bool is_td(struct kvm *kvm) static inline bool is_td(struct kvm *kvm)

View file

@ -129,6 +129,8 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
int tdx_vcpu_create(struct kvm_vcpu *vcpu); int tdx_vcpu_create(struct kvm_vcpu *vcpu);
void tdx_vcpu_free(struct kvm_vcpu *vcpu); void tdx_vcpu_free(struct kvm_vcpu *vcpu);
int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
#else #else
static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; } static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; }
static inline void tdx_mmu_release_hkid(struct kvm *kvm) {} static inline void tdx_mmu_release_hkid(struct kvm *kvm) {}
@ -137,6 +139,8 @@ static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOP
static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; } static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {} static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
#endif #endif
#endif /* __KVM_X86_VMX_X86_OPS_H */ #endif /* __KVM_X86_VMX_X86_OPS_H */

View file

@ -6287,6 +6287,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_SET_DEVICE_ATTR: case KVM_SET_DEVICE_ATTR:
r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp); r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
break; break;
case KVM_MEMORY_ENCRYPT_OP:
r = -ENOTTY;
if (!kvm_x86_ops.vcpu_mem_enc_ioctl)
goto out;
r = kvm_x86_ops.vcpu_mem_enc_ioctl(vcpu, argp);
break;
default: default:
r = -EINVAL; r = -EINVAL;
} }
@ -12676,6 +12682,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
{ {
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
} }
EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{ {