KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Copyright (C) 2020 - Google LLC
|
|
|
|
* Author: Quentin Perret <qperret@google.com>
|
|
|
|
*/
|
|
|
|
|
2023-04-20 13:33:56 +01:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kmemleak.h>
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <linux/memblock.h>
|
2022-11-10 19:02:46 +00:00
|
|
|
#include <linux/mutex.h>
|
2021-03-19 10:01:35 +00:00
|
|
|
#include <linux/sort.h>
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
|
2021-12-02 17:10:48 +00:00
|
|
|
#include <asm/kvm_pkvm.h>
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
|
2021-12-02 17:10:48 +00:00
|
|
|
#include "hyp_constants.h"
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
|
2023-04-20 13:33:56 +01:00
|
|
|
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
|
|
|
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
|
|
|
|
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
|
|
|
|
|
|
|
|
phys_addr_t hyp_mem_base;
|
|
|
|
phys_addr_t hyp_mem_size;
|
|
|
|
|
2021-03-19 10:01:35 +00:00
|
|
|
static int cmp_hyp_memblock(const void *p1, const void *p2)
|
|
|
|
{
|
|
|
|
const struct memblock_region *r1 = p1;
|
|
|
|
const struct memblock_region *r2 = p2;
|
|
|
|
|
|
|
|
return r1->base < r2->base ? -1 : (r1->base > r2->base);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init sort_memblock_regions(void)
|
|
|
|
{
|
|
|
|
sort(hyp_memory,
|
|
|
|
*hyp_memblock_nr_ptr,
|
|
|
|
sizeof(struct memblock_region),
|
|
|
|
cmp_hyp_memblock,
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
static int __init register_memblock_regions(void)
|
|
|
|
{
|
|
|
|
struct memblock_region *reg;
|
|
|
|
|
|
|
|
for_each_mem_region(reg) {
|
|
|
|
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
|
|
|
|
(*hyp_memblock_nr_ptr)++;
|
|
|
|
}
|
2021-03-19 10:01:35 +00:00
|
|
|
sort_memblock_regions();
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void __init kvm_hyp_reserve(void)
|
|
|
|
{
|
2022-11-10 19:02:36 +00:00
|
|
|
u64 hyp_mem_pages = 0;
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (kvm_get_mode() != KVM_MODE_PROTECTED)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ret = register_memblock_regions();
|
|
|
|
if (ret) {
|
|
|
|
*hyp_memblock_nr_ptr = 0;
|
|
|
|
kvm_err("Failed to register hyp memblocks: %d\n", ret);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
hyp_mem_pages += hyp_s1_pgtable_pages();
|
2021-06-08 11:45:15 +00:00
|
|
|
hyp_mem_pages += host_s2_pgtable_pages();
|
2022-11-10 19:02:45 +00:00
|
|
|
hyp_mem_pages += hyp_vm_table_pages();
|
2022-11-10 19:02:36 +00:00
|
|
|
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
|
2023-05-23 11:18:20 +01:00
|
|
|
hyp_mem_pages += hyp_ffa_proxy_pages();
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to allocate a PMD-aligned region to reduce TLB pressure once
|
|
|
|
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
|
|
|
|
*/
|
|
|
|
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
|
2021-09-02 15:00:26 -07:00
|
|
|
hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
|
|
|
|
PMD_SIZE);
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
if (!hyp_mem_base)
|
2021-09-02 15:00:26 -07:00
|
|
|
hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
|
KVM: arm64: Prepare the creation of s1 mappings at EL2
When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.
This leads to the following boot flow in nVHE Protected mode:
1. the host allocates memory for the hypervisor very early on, using
the memblock API;
2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;
3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;
4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;
5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.
6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.
Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.
Credits to Will for __pkvm_init_switch_pgd.
Acked-by: Will Deacon <will@kernel.org>
Co-authored-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com
2021-03-19 10:01:25 +00:00
|
|
|
else
|
|
|
|
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
|
|
|
|
|
|
|
|
if (!hyp_mem_base) {
|
|
|
|
kvm_err("Failed to reserve hyp memory\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
|
|
|
|
hyp_mem_base);
|
|
|
|
}
|
2022-11-10 19:02:46 +00:00
|
|
|
|
2024-01-24 09:10:28 +00:00
|
|
|
static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
if (host_kvm->arch.pkvm.handle) {
|
|
|
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
|
|
|
|
host_kvm->arch.pkvm.handle));
|
|
|
|
}
|
|
|
|
|
|
|
|
host_kvm->arch.pkvm.handle = 0;
|
|
|
|
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
|
|
|
|
}
|
|
|
|
|
2022-11-10 19:02:46 +00:00
|
|
|
/*
|
|
|
|
* Allocates and donates memory for hypervisor VM structs at EL2.
|
|
|
|
*
|
|
|
|
* Allocates space for the VM state, which includes the hyp vm as well as
|
|
|
|
* the hyp vcpus.
|
|
|
|
*
|
|
|
|
* Stores an opaque handler in the kvm struct for future reference.
|
|
|
|
*
|
|
|
|
* Return 0 on success, negative error code on failure.
|
|
|
|
*/
|
|
|
|
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
|
|
|
|
struct kvm_vcpu *host_vcpu;
|
|
|
|
pkvm_handle_t handle;
|
|
|
|
void *pgd, *hyp_vm;
|
|
|
|
unsigned long idx;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (host_kvm->created_vcpus < 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
KVM: arm64: Move VTCR_EL2 into struct s2_mmu
We currently have a global VTCR_EL2 value for each guest, even
if the guest uses NV. This implies that the guest's own S2 must
fit in the host's. This is odd, for multiple reasons:
- the PARange values and the number of IPA bits don't necessarily
match: you can have 33 bits of IPA space, and yet you can only
describe 32 or 36 bits of PARange
- When userspace set the IPA space, it creates a contract with the
kernel saying "this is the IPA space I'm prepared to handle".
At no point does it constraint the guest's own IPA space as
long as the guest doesn't try to use a [I]PA outside of the
IPA space set by userspace
- We don't even try to hide the value of ID_AA64MMFR0_EL1.PARange.
And then there is the consequence of the above: if a guest tries
to create a S2 that has for input address something that is larger
than the IPA space defined by the host, we inject a fatal exception.
This is no good. For all intent and purposes, a guest should be
able to have the S2 it really wants, as long as the *output* address
of that S2 isn't outside of the IPA space.
For that, we need to have a per-s2_mmu VTCR_EL2 setting, which
allows us to represent the full PARange. Move the vctr field into
the s2_mmu structure, which has no impact whatsoever, except for NV.
Note that once we are able to override ID_AA64MMFR0_EL1.PARange
from userspace, we'll also be able to restrict the size of the
shadow S2 that NV uses.
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20231012205108.3937270-1-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
2023-10-12 21:51:08 +01:00
|
|
|
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
|
2022-11-10 19:02:46 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The PGD pages will be reclaimed using a hyp_memcache which implies
|
|
|
|
* page granularity. So, use alloc_pages_exact() to get individual
|
|
|
|
* refcounts.
|
|
|
|
*/
|
|
|
|
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!pgd)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
|
|
|
|
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
|
|
|
|
size_mul(sizeof(void *),
|
|
|
|
host_kvm->created_vcpus)));
|
|
|
|
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!hyp_vm) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_pgd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Donate the VM memory to hyp and let hyp initialize it. */
|
|
|
|
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
|
|
|
|
if (ret < 0)
|
|
|
|
goto free_vm;
|
|
|
|
|
|
|
|
handle = ret;
|
|
|
|
|
|
|
|
host_kvm->arch.pkvm.handle = handle;
|
|
|
|
|
|
|
|
/* Donate memory for the vcpus at hyp and initialize it. */
|
|
|
|
hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
|
|
|
|
kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
|
|
|
|
void *hyp_vcpu;
|
|
|
|
|
|
|
|
/* Indexing of the vcpus to be sequential starting at 0. */
|
|
|
|
if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto destroy_vm;
|
|
|
|
}
|
|
|
|
|
|
|
|
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!hyp_vcpu) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto destroy_vm;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
|
|
|
|
hyp_vcpu);
|
2022-11-10 19:02:53 +00:00
|
|
|
if (ret) {
|
|
|
|
free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
|
2022-11-10 19:02:46 +00:00
|
|
|
goto destroy_vm;
|
2022-11-10 19:02:53 +00:00
|
|
|
}
|
2022-11-10 19:02:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
destroy_vm:
|
2024-01-24 09:10:28 +00:00
|
|
|
__pkvm_destroy_hyp_vm(host_kvm);
|
2022-11-10 19:02:46 +00:00
|
|
|
return ret;
|
|
|
|
free_vm:
|
|
|
|
free_pages_exact(hyp_vm, hyp_vm_sz);
|
|
|
|
free_pgd:
|
|
|
|
free_pages_exact(pgd, pgd_sz);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int pkvm_create_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
2024-01-24 09:10:28 +00:00
|
|
|
mutex_lock(&host_kvm->arch.config_lock);
|
2022-11-10 19:02:46 +00:00
|
|
|
if (!host_kvm->arch.pkvm.handle)
|
|
|
|
ret = __pkvm_create_hyp_vm(host_kvm);
|
2024-01-24 09:10:28 +00:00
|
|
|
mutex_unlock(&host_kvm->arch.config_lock);
|
2022-11-10 19:02:46 +00:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|
|
|
{
|
2024-01-24 09:10:28 +00:00
|
|
|
mutex_lock(&host_kvm->arch.config_lock);
|
|
|
|
__pkvm_destroy_hyp_vm(host_kvm);
|
|
|
|
mutex_unlock(&host_kvm->arch.config_lock);
|
2022-11-10 19:02:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int pkvm_init_host_vm(struct kvm *host_kvm)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2023-04-20 13:33:56 +01:00
|
|
|
|
|
|
|
static void __init _kvm_host_prot_finalize(void *arg)
|
|
|
|
{
|
|
|
|
int *err = arg;
|
|
|
|
|
|
|
|
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
|
|
|
|
WRITE_ONCE(*err, -EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init pkvm_drop_host_privileges(void)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Flip the static key upfront as that may no longer be possible
|
|
|
|
* once the host stage 2 is installed.
|
|
|
|
*/
|
|
|
|
static_branch_enable(&kvm_protected_mode_initialized);
|
|
|
|
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init finalize_pkvm(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2023-07-04 20:32:43 +01:00
|
|
|
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
|
2023-04-20 13:33:56 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exclude HYP sections from kmemleak so that they don't get peeked
|
|
|
|
* at, which would end badly once inaccessible.
|
|
|
|
*/
|
|
|
|
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
|
2024-04-23 16:05:20 +01:00
|
|
|
kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
|
2023-04-20 13:33:56 +01:00
|
|
|
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
|
|
|
|
|
|
|
|
ret = pkvm_drop_host_privileges();
|
|
|
|
if (ret)
|
|
|
|
pr_err("Failed to finalize Hyp protection: %d\n", ret);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
device_initcall_sync(finalize_pkvm);
|