mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

* kvm-arm64/misc-6.16: : . : Misc changes and improvements for 6.16: : : - Add a new selftest for the SVE host state being corrupted by a guest : : - Keep HCR_EL2.xMO set at all times for systems running with the kernel at EL2, : ensuring that the window for interrupts is slightly bigger, and avoiding : a pretty bad erratum on the AmpereOne HW : : - Replace a couple of open-coded on/off strings with str_on_off() : : - Get rid of the pKVM memblock sorting, which now appears to be superflous : : - Drop superflous clearing of ICH_LR_EOI in the LR when nesting : : - Add workaround for AmpereOne's erratum AC04_CPU_23, which suffers from : a pretty bad case of TLB corruption unless accesses to HCR_EL2 are : heavily synchronised : : - Add a per-VM, per-ITS debugfs entry to dump the state of the ITS tables : in a human-friendly fashion : . KVM: arm64: Fix documentation for vgic_its_iter_next() KVM: arm64: vgic-its: Add debugfs interface to expose ITS tables arm64: errata: Work around AmpereOne's erratum AC04_CPU_23 KVM: arm64: nv: Remove clearing of ICH_LR<n>.EOI if ICH_LR<n>.HW == 1 KVM: arm64: Drop sort_memblock_regions() KVM: arm64: selftests: Add test for SVE host corruption KVM: arm64: Force HCR_EL2.xMO to 1 at all times in VHE mode KVM: arm64: Replace ternary flags with str_on_off() helper Signed-off-by: Marc Zyngier <maz@kernel.org>
452 lines
12 KiB
C
452 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2020 - Google LLC
|
|
* Author: Quentin Perret <qperret@google.com>
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/interval_tree_generic.h>
|
|
#include <linux/kmemleak.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <asm/kvm_mmu.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/mutex.h>
|
|
|
|
#include <asm/kvm_pkvm.h>
|
|
|
|
#include "hyp_constants.h"
|
|
|
|
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
|
|
|
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
|
|
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
|
|
|
|
phys_addr_t hyp_mem_base;
|
|
phys_addr_t hyp_mem_size;
|
|
|
|
static int __init register_memblock_regions(void)
|
|
{
|
|
struct memblock_region *reg;
|
|
|
|
for_each_mem_region(reg) {
|
|
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
|
|
return -ENOMEM;
|
|
|
|
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
|
|
(*hyp_memblock_nr_ptr)++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __init kvm_hyp_reserve(void)
|
|
{
|
|
u64 hyp_mem_pages = 0;
|
|
int ret;
|
|
|
|
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
|
|
return;
|
|
|
|
if (kvm_get_mode() != KVM_MODE_PROTECTED)
|
|
return;
|
|
|
|
ret = register_memblock_regions();
|
|
if (ret) {
|
|
*hyp_memblock_nr_ptr = 0;
|
|
kvm_err("Failed to register hyp memblocks: %d\n", ret);
|
|
return;
|
|
}
|
|
|
|
hyp_mem_pages += hyp_s1_pgtable_pages();
|
|
hyp_mem_pages += host_s2_pgtable_pages();
|
|
hyp_mem_pages += hyp_vm_table_pages();
|
|
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
|
|
hyp_mem_pages += pkvm_selftest_pages();
|
|
hyp_mem_pages += hyp_ffa_proxy_pages();
|
|
|
|
/*
|
|
* Try to allocate a PMD-aligned region to reduce TLB pressure once
|
|
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
|
|
*/
|
|
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
|
|
hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
|
|
PMD_SIZE);
|
|
if (!hyp_mem_base)
|
|
hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
|
|
else
|
|
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
|
|
|
|
if (!hyp_mem_base) {
|
|
kvm_err("Failed to reserve hyp memory\n");
|
|
return;
|
|
}
|
|
|
|
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
|
|
hyp_mem_base);
|
|
}
|
|
|
|
static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|
{
|
|
if (host_kvm->arch.pkvm.handle) {
|
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
|
|
host_kvm->arch.pkvm.handle));
|
|
}
|
|
|
|
host_kvm->arch.pkvm.handle = 0;
|
|
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
|
|
free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
|
|
}
|
|
|
|
static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
|
|
{
|
|
size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
|
|
pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
|
|
void *hyp_vcpu;
|
|
int ret;
|
|
|
|
vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
|
|
|
|
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
|
|
if (!hyp_vcpu)
|
|
return -ENOMEM;
|
|
|
|
ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
|
|
if (!ret)
|
|
vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
|
|
else
|
|
free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Allocates and donates memory for hypervisor VM structs at EL2.
|
|
*
|
|
* Allocates space for the VM state, which includes the hyp vm as well as
|
|
* the hyp vcpus.
|
|
*
|
|
* Stores an opaque handler in the kvm struct for future reference.
|
|
*
|
|
* Return 0 on success, negative error code on failure.
|
|
*/
|
|
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
|
|
{
|
|
size_t pgd_sz, hyp_vm_sz;
|
|
void *pgd, *hyp_vm;
|
|
int ret;
|
|
|
|
if (host_kvm->created_vcpus < 1)
|
|
return -EINVAL;
|
|
|
|
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
|
|
|
|
/*
|
|
* The PGD pages will be reclaimed using a hyp_memcache which implies
|
|
* page granularity. So, use alloc_pages_exact() to get individual
|
|
* refcounts.
|
|
*/
|
|
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
|
|
if (!pgd)
|
|
return -ENOMEM;
|
|
|
|
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
|
|
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
|
|
size_mul(sizeof(void *),
|
|
host_kvm->created_vcpus)));
|
|
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
|
|
if (!hyp_vm) {
|
|
ret = -ENOMEM;
|
|
goto free_pgd;
|
|
}
|
|
|
|
/* Donate the VM memory to hyp and let hyp initialize it. */
|
|
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
|
|
if (ret < 0)
|
|
goto free_vm;
|
|
|
|
host_kvm->arch.pkvm.handle = ret;
|
|
host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
|
|
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
|
|
|
|
return 0;
|
|
free_vm:
|
|
free_pages_exact(hyp_vm, hyp_vm_sz);
|
|
free_pgd:
|
|
free_pages_exact(pgd, pgd_sz);
|
|
return ret;
|
|
}
|
|
|
|
int pkvm_create_hyp_vm(struct kvm *host_kvm)
|
|
{
|
|
int ret = 0;
|
|
|
|
mutex_lock(&host_kvm->arch.config_lock);
|
|
if (!host_kvm->arch.pkvm.handle)
|
|
ret = __pkvm_create_hyp_vm(host_kvm);
|
|
mutex_unlock(&host_kvm->arch.config_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
|
|
{
|
|
int ret = 0;
|
|
|
|
mutex_lock(&vcpu->kvm->arch.config_lock);
|
|
if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
|
|
ret = __pkvm_create_hyp_vcpu(vcpu);
|
|
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|
{
|
|
mutex_lock(&host_kvm->arch.config_lock);
|
|
__pkvm_destroy_hyp_vm(host_kvm);
|
|
mutex_unlock(&host_kvm->arch.config_lock);
|
|
}
|
|
|
|
int pkvm_init_host_vm(struct kvm *host_kvm)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static void __init _kvm_host_prot_finalize(void *arg)
|
|
{
|
|
int *err = arg;
|
|
|
|
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
|
|
WRITE_ONCE(*err, -EINVAL);
|
|
}
|
|
|
|
static int __init pkvm_drop_host_privileges(void)
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* Flip the static key upfront as that may no longer be possible
|
|
* once the host stage 2 is installed.
|
|
*/
|
|
static_branch_enable(&kvm_protected_mode_initialized);
|
|
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
|
|
return ret;
|
|
}
|
|
|
|
static int __init finalize_pkvm(void)
|
|
{
|
|
int ret;
|
|
|
|
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
|
|
return 0;
|
|
|
|
/*
|
|
* Exclude HYP sections from kmemleak so that they don't get peeked
|
|
* at, which would end badly once inaccessible.
|
|
*/
|
|
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
|
|
kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
|
|
kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
|
|
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
|
|
|
|
ret = pkvm_drop_host_privileges();
|
|
if (ret)
|
|
pr_err("Failed to finalize Hyp protection: %d\n", ret);
|
|
|
|
return ret;
|
|
}
|
|
device_initcall_sync(finalize_pkvm);
|
|
|
|
static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
|
|
{
|
|
return m->gfn * PAGE_SIZE;
|
|
}
|
|
|
|
static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
|
|
{
|
|
return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
|
|
}
|
|
|
|
INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
|
|
__pkvm_mapping_start, __pkvm_mapping_end, static,
|
|
pkvm_mapping);
|
|
|
|
/*
|
|
* __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow
|
|
* freeing of __map inline.
|
|
*/
|
|
#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
|
|
for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
|
|
__start, __end - 1); \
|
|
__tmp && ({ \
|
|
__map = __tmp; \
|
|
__tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
|
|
true; \
|
|
}); \
|
|
)
|
|
|
|
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
|
struct kvm_pgtable_mm_ops *mm_ops)
|
|
{
|
|
pgt->pkvm_mappings = RB_ROOT_CACHED;
|
|
pgt->mmu = mmu;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
|
|
{
|
|
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
|
pkvm_handle_t handle = kvm->arch.pkvm.handle;
|
|
struct pkvm_mapping *mapping;
|
|
int ret;
|
|
|
|
if (!handle)
|
|
return 0;
|
|
|
|
for_each_mapping_in_range_safe(pgt, start, end, mapping) {
|
|
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
|
|
mapping->nr_pages);
|
|
if (WARN_ON(ret))
|
|
return ret;
|
|
pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
|
|
kfree(mapping);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
|
{
|
|
__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
|
|
}
|
|
|
|
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|
u64 phys, enum kvm_pgtable_prot prot,
|
|
void *mc, enum kvm_pgtable_walk_flags flags)
|
|
{
|
|
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
|
struct pkvm_mapping *mapping = NULL;
|
|
struct kvm_hyp_memcache *cache = mc;
|
|
u64 gfn = addr >> PAGE_SHIFT;
|
|
u64 pfn = phys >> PAGE_SHIFT;
|
|
int ret;
|
|
|
|
if (size != PAGE_SIZE && size != PMD_SIZE)
|
|
return -EINVAL;
|
|
|
|
lockdep_assert_held_write(&kvm->mmu_lock);
|
|
|
|
/*
|
|
* Calling stage2_map() on top of existing mappings is either happening because of a race
|
|
* with another vCPU, or because we're changing between page and block mappings. As per
|
|
* user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
|
|
*/
|
|
mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
|
|
if (mapping) {
|
|
if (size == (mapping->nr_pages * PAGE_SIZE))
|
|
return -EAGAIN;
|
|
|
|
/* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
|
|
ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
|
|
if (ret)
|
|
return ret;
|
|
mapping = NULL;
|
|
}
|
|
|
|
ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
|
|
if (WARN_ON(ret))
|
|
return ret;
|
|
|
|
swap(mapping, cache->mapping);
|
|
mapping->gfn = gfn;
|
|
mapping->pfn = pfn;
|
|
mapping->nr_pages = size / PAGE_SIZE;
|
|
pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|
{
|
|
lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
|
|
|
|
return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
|
|
}
|
|
|
|
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|
{
|
|
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
|
pkvm_handle_t handle = kvm->arch.pkvm.handle;
|
|
struct pkvm_mapping *mapping;
|
|
int ret = 0;
|
|
|
|
lockdep_assert_held(&kvm->mmu_lock);
|
|
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
|
|
ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
|
|
mapping->nr_pages);
|
|
if (WARN_ON(ret))
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|
{
|
|
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
|
struct pkvm_mapping *mapping;
|
|
|
|
lockdep_assert_held(&kvm->mmu_lock);
|
|
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
|
|
__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
|
|
PAGE_SIZE * mapping->nr_pages);
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
|
|
{
|
|
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
|
pkvm_handle_t handle = kvm->arch.pkvm.handle;
|
|
struct pkvm_mapping *mapping;
|
|
bool young = false;
|
|
|
|
lockdep_assert_held(&kvm->mmu_lock);
|
|
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
|
|
young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
|
|
mapping->nr_pages, mkold);
|
|
|
|
return young;
|
|
}
|
|
|
|
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
|
|
enum kvm_pgtable_walk_flags flags)
|
|
{
|
|
return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
|
|
}
|
|
|
|
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
|
|
enum kvm_pgtable_walk_flags flags)
|
|
{
|
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
|
|
}
|
|
|
|
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
|
|
{
|
|
WARN_ON_ONCE(1);
|
|
}
|
|
|
|
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
|
|
enum kvm_pgtable_prot prot, void *mc, bool force_pte)
|
|
{
|
|
WARN_ON_ONCE(1);
|
|
return NULL;
|
|
}
|
|
|
|
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|
struct kvm_mmu_memory_cache *mc)
|
|
{
|
|
WARN_ON_ONCE(1);
|
|
return -EINVAL;
|
|
}
|