2024-02-14 13:29:04 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
// Copyright 2023 Google LLC
|
|
|
|
// Author: Ard Biesheuvel <ardb@google.com>
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/sizes.h>
|
|
|
|
|
|
|
|
#include <asm/memory.h>
|
|
|
|
#include <asm/pgalloc.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
|
|
|
|
#include "pi.h"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* map_range - Map a contiguous range of physical pages into virtual memory
|
|
|
|
*
|
|
|
|
* @pte: Address of physical pointer to array of pages to
|
|
|
|
* allocate page tables from
|
|
|
|
* @start: Virtual address of the start of the range
|
|
|
|
* @end: Virtual address of the end of the range (exclusive)
|
|
|
|
* @pa: Physical address of the start of the range
|
|
|
|
* @prot: Access permissions of the range
|
|
|
|
* @level: Translation level for the mapping
|
|
|
|
* @tbl: The level @level page table to create the mappings in
|
|
|
|
* @may_use_cont: Whether the use of the contiguous attribute is allowed
|
|
|
|
* @va_offset: Offset between a physical page and its current mapping
|
|
|
|
* in the VA space
|
|
|
|
*/
|
|
|
|
void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
|
|
|
|
int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
|
|
|
|
{
|
|
|
|
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
|
2025-04-07 11:01:13 +05:30
|
|
|
ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
|
2025-03-11 10:27:10 +05:30
|
|
|
int lshift = (3 - level) * PTDESC_TABLE_SHIFT;
|
2024-02-14 13:29:04 +01:00
|
|
|
u64 lmask = (PAGE_SIZE << lshift) - 1;
|
|
|
|
|
|
|
|
start &= PAGE_MASK;
|
|
|
|
pa &= PAGE_MASK;
|
|
|
|
|
|
|
|
/* Advance tbl to the entry that covers start */
|
|
|
|
tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the right block/page bits for this level unless we are
|
|
|
|
* clearing the mapping
|
|
|
|
*/
|
|
|
|
if (protval)
|
arm64/kernel: Always use level 2 or higher for early mappings
The page table population code in map_range() uses a recursive algorithm
to create the early mappings of the kernel, the DTB and the ID mapped
text and data pages, and this fails to take into account that the way
these page tables may be constructed is not precisely the same at each
level. In particular, block mappings are not permitted at each level,
and the code as it exists today might inadvertently create such a
forbidden block mapping if it were used to map a region of the
appropriate size and alignment.
This never happens in practice, given the limited size of the assets
being mapped by the early boot code. Nonetheless, it would be better if
this code would behave correctly in all circumstances.
So only permit block mappings at level 2, and page mappings at level 3,
for any page size, and use table mappings exclusively at all other
levels. This change should have no impact in practice, but it makes the
code more robust.
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Reported-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Link: https://lore.kernel.org/r/20250311073043.96795-2-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-03-11 08:30:44 +01:00
|
|
|
protval |= (level == 2) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
|
2024-02-14 13:29:04 +01:00
|
|
|
|
|
|
|
while (start < end) {
|
|
|
|
u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
|
|
|
|
|
arm64/kernel: Always use level 2 or higher for early mappings
The page table population code in map_range() uses a recursive algorithm
to create the early mappings of the kernel, the DTB and the ID mapped
text and data pages, and this fails to take into account that the way
these page tables may be constructed is not precisely the same at each
level. In particular, block mappings are not permitted at each level,
and the code as it exists today might inadvertently create such a
forbidden block mapping if it were used to map a region of the
appropriate size and alignment.
This never happens in practice, given the limited size of the assets
being mapped by the early boot code. Nonetheless, it would be better if
this code would behave correctly in all circumstances.
So only permit block mappings at level 2, and page mappings at level 3,
for any page size, and use table mappings exclusively at all other
levels. This change should have no impact in practice, but it makes the
code more robust.
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Reported-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Link: https://lore.kernel.org/r/20250311073043.96795-2-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2025-03-11 08:30:44 +01:00
|
|
|
if (level < 2 || (level == 2 && (start | next | pa) & lmask)) {
|
2024-02-14 13:29:04 +01:00
|
|
|
/*
|
|
|
|
* This chunk needs a finer grained mapping. Create a
|
|
|
|
* table mapping if necessary and recurse.
|
|
|
|
*/
|
|
|
|
if (pte_none(*tbl)) {
|
|
|
|
*tbl = __pte(__phys_to_pte_val(*pte) |
|
|
|
|
PMD_TYPE_TABLE | PMD_TABLE_UXN);
|
|
|
|
*pte += PTRS_PER_PTE * sizeof(pte_t);
|
|
|
|
}
|
|
|
|
map_range(pte, start, next, pa, prot, level + 1,
|
|
|
|
(pte_t *)(__pte_to_phys(*tbl) + va_offset),
|
|
|
|
may_use_cont, va_offset);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Start a contiguous range if start and pa are
|
|
|
|
* suitably aligned
|
|
|
|
*/
|
|
|
|
if (((start | pa) & cmask) == 0 && may_use_cont)
|
|
|
|
protval |= PTE_CONT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear the contiguous attribute if the remaining
|
|
|
|
* range does not cover a contiguous block
|
|
|
|
*/
|
|
|
|
if ((end & ~cmask) <= start)
|
|
|
|
protval &= ~PTE_CONT;
|
|
|
|
|
|
|
|
/* Put down a block or page mapping */
|
|
|
|
*tbl = __pte(__phys_to_pte_val(pa) | protval);
|
|
|
|
}
|
|
|
|
pa += next - start;
|
|
|
|
start = next;
|
|
|
|
tbl++;
|
|
|
|
}
|
|
|
|
}
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:07 +01:00
|
|
|
|
2025-04-07 11:01:13 +05:30
|
|
|
asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:07 +01:00
|
|
|
{
|
|
|
|
u64 ptep = (u64)pg_dir + PAGE_SIZE;
|
arm64: Enable LPA2 at boot if supported by the system
Update the early kernel mapping code to take 52-bit virtual addressing
into account based on the LPA2 feature. This is a bit more involved than
LVA (which is supported with 64k pages only), given that some page table
descriptor bits change meaning in this case.
To keep the handling in asm to a minimum, the initial ID map is still
created with 48-bit virtual addressing, which implies that the kernel
image must be loaded into 48-bit addressable physical memory. This is
currently required by the boot protocol, even though we happen to
support placement outside of that for LVA/64k based configurations.
Enabling LPA2 involves more than setting TCR.T1SZ to a lower value,
there is also a DS bit in TCR that needs to be set, and which changes
the meaning of bits [9:8] in all page table descriptors. Since we cannot
enable DS and every live page table descriptor at the same time, let's
pivot through another temporary mapping. This avoids the need to
reintroduce manipulations of the page tables with the MMU and caches
disabled.
To permit the LPA2 feature to be overridden on the kernel command line,
which may be necessary to work around silicon errata, or to deal with
mismatched features on heterogeneous SoC designs, test for CPU feature
overrides first, and only then enable LPA2.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-78-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:19 +01:00
|
|
|
pgprot_t text_prot = PAGE_KERNEL_ROX;
|
|
|
|
pgprot_t data_prot = PAGE_KERNEL;
|
|
|
|
|
|
|
|
pgprot_val(text_prot) &= ~clrmask;
|
|
|
|
pgprot_val(data_prot) &= ~clrmask;
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:07 +01:00
|
|
|
|
|
|
|
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
|
arm64: Enable LPA2 at boot if supported by the system
Update the early kernel mapping code to take 52-bit virtual addressing
into account based on the LPA2 feature. This is a bit more involved than
LVA (which is supported with 64k pages only), given that some page table
descriptor bits change meaning in this case.
To keep the handling in asm to a minimum, the initial ID map is still
created with 48-bit virtual addressing, which implies that the kernel
image must be loaded into 48-bit addressable physical memory. This is
currently required by the boot protocol, even though we happen to
support placement outside of that for LVA/64k based configurations.
Enabling LPA2 involves more than setting TCR.T1SZ to a lower value,
there is also a DS bit in TCR that needs to be set, and which changes
the meaning of bits [9:8] in all page table descriptors. Since we cannot
enable DS and every live page table descriptor at the same time, let's
pivot through another temporary mapping. This avoids the need to
reintroduce manipulations of the page tables with the MMU and caches
disabled.
To permit the LPA2 feature to be overridden on the kernel command line,
which may be necessary to work around silicon errata, or to deal with
mismatched features on heterogeneous SoC designs, test for CPU feature
overrides first, and only then enable LPA2.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-78-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:19 +01:00
|
|
|
text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:07 +01:00
|
|
|
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
|
arm64: Enable LPA2 at boot if supported by the system
Update the early kernel mapping code to take 52-bit virtual addressing
into account based on the LPA2 feature. This is a bit more involved than
LVA (which is supported with 64k pages only), given that some page table
descriptor bits change meaning in this case.
To keep the handling in asm to a minimum, the initial ID map is still
created with 48-bit virtual addressing, which implies that the kernel
image must be loaded into 48-bit addressable physical memory. This is
currently required by the boot protocol, even though we happen to
support placement outside of that for LVA/64k based configurations.
Enabling LPA2 involves more than setting TCR.T1SZ to a lower value,
there is also a DS bit in TCR that needs to be set, and which changes
the meaning of bits [9:8] in all page table descriptors. Since we cannot
enable DS and every live page table descriptor at the same time, let's
pivot through another temporary mapping. This avoids the need to
reintroduce manipulations of the page tables with the MMU and caches
disabled.
To permit the LPA2 feature to be overridden on the kernel command line,
which may be necessary to work around silicon errata, or to deal with
mismatched features on heterogeneous SoC designs, test for CPU feature
overrides first, and only then enable LPA2.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-78-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:19 +01:00
|
|
|
data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
|
arm64: kernel: Create initial ID map from C code
The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.
So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.
Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-02-14 13:29:07 +01:00
|
|
|
|
|
|
|
return ptep;
|
|
|
|
}
|