2019-06-03 07:44:50 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Based on arch/arm/mm/mmap.c
|
|
|
|
*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
|
2021-01-27 12:52:16 +00:00
|
|
|
#include <linux/io.h>
|
2017-05-19 16:42:00 +01:00
|
|
|
#include <linux/memblock.h>
|
2022-03-03 18:00:44 +00:00
|
|
|
#include <linux/mm.h>
|
2021-01-26 20:24:44 +08:00
|
|
|
#include <linux/types.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2022-03-03 18:00:44 +00:00
|
|
|
#include <asm/cpufeature.h>
|
2021-01-26 20:24:44 +08:00
|
|
|
#include <asm/page.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2022-07-11 12:35:39 +05:30
|
|
|
static pgprot_t protection_map[16] __ro_after_init = {
|
|
|
|
[VM_NONE] = PAGE_NONE,
|
|
|
|
[VM_READ] = PAGE_READONLY,
|
|
|
|
[VM_WRITE] = PAGE_READONLY,
|
|
|
|
[VM_WRITE | VM_READ] = PAGE_READONLY,
|
|
|
|
/* PAGE_EXECONLY if Enhanced PAN */
|
|
|
|
[VM_EXEC] = PAGE_READONLY_EXEC,
|
|
|
|
[VM_EXEC | VM_READ] = PAGE_READONLY_EXEC,
|
|
|
|
[VM_EXEC | VM_WRITE] = PAGE_READONLY_EXEC,
|
|
|
|
[VM_EXEC | VM_WRITE | VM_READ] = PAGE_READONLY_EXEC,
|
|
|
|
[VM_SHARED] = PAGE_NONE,
|
|
|
|
[VM_SHARED | VM_READ] = PAGE_READONLY,
|
|
|
|
[VM_SHARED | VM_WRITE] = PAGE_SHARED,
|
|
|
|
[VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
|
|
|
|
/* PAGE_EXECONLY if Enhanced PAN */
|
|
|
|
[VM_SHARED | VM_EXEC] = PAGE_READONLY_EXEC,
|
|
|
|
[VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_EXEC,
|
|
|
|
[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC,
|
|
|
|
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
|
|
|
|
};
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* You really shouldn't be using read() or write() on /dev/mem. This might go
|
|
|
|
* away in the future.
|
|
|
|
*/
|
2014-10-02 15:56:59 +01:00
|
|
|
int valid_phys_addr_range(phys_addr_t addr, size_t size)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
2017-05-19 16:42:00 +01:00
|
|
|
/*
|
|
|
|
* Check whether addr is covered by a memory region without the
|
|
|
|
* MEMBLOCK_NOMAP attribute, and whether that region covers the
|
|
|
|
* entire range. In theory, this could lead to false negatives
|
|
|
|
* if the range is covered by distinct but adjacent memory regions
|
|
|
|
* that only differ in other attributes. However, few of such
|
|
|
|
* attributes have been defined, and it is debatable whether it
|
|
|
|
* follows that /dev/mem read() calls should be able traverse
|
|
|
|
* such boundaries.
|
|
|
|
*/
|
|
|
|
return memblock_is_region_memory(addr, size) &&
|
|
|
|
memblock_is_map_memory(addr);
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do not allow /dev/mem mappings beyond the supported physical range.
|
|
|
|
*/
|
|
|
|
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
|
|
|
|
{
|
|
|
|
return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
|
|
|
|
}
|
2022-03-03 18:00:44 +00:00
|
|
|
|
|
|
|
static int __init adjust_protection_map(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* With Enhanced PAN we can honour the execute-only permissions as
|
|
|
|
* there is no PAN override with such mappings.
|
|
|
|
*/
|
arm64: Avoid cpus_have_const_cap() for ARM64_HAS_EPAN
We use cpus_have_const_cap() to check for ARM64_HAS_EPAN but this is not
necessary and alternative_has_cap() or cpus_have_cap() would be
preferable.
For historical reasons, cpus_have_const_cap() is more complicated than
it needs to be. Before cpucaps are finalized, it will perform a bitmap
test of the system_cpucaps bitmap, and once cpucaps are finalized it
will use an alternative branch. This used to be necessary to handle some
race conditions in the window between cpucap detection and the
subsequent patching of alternatives and static branches, where different
branches could be out-of-sync with one another (or w.r.t. alternative
sequences). Now that we use alternative branches instead of static
branches, these are all patched atomically w.r.t. one another, and there
are only a handful of cases that need special care in the window between
cpucap detection and alternative patching.
Due to the above, it would be nice to remove cpus_have_const_cap(), and
migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(),
or cpus_have_cap() depending on when their requirements. This will
remove redundant instructions and improve code generation, and will make
it easier to determine how each callsite will behave before, during, and
after alternative patching.
The ARM64_HAS_EPAN cpucap is used to affect two things:
1) The permision bits used for userspace executable mappings, which are
chosen by adjust_protection_map(), which is an arch_initcall. This is
called after the ARM64_HAS_EPAN cpucap has been detected and
alternatives have been patched, and before any userspace translation
tables exist.
2) The handling of faults taken from (user or kernel) accesses to
userspace executable mappings in do_page_fault(). Userspace
translation tables are created after adjust_protection_map() is
called, and hence after the ARM64_HAS_EPAN cpucap has been detected
and alternatives have been patched.
Neither of these run until after ARM64_HAS_EPAN cpucap has been detected
and alternatives have been patched, and hence there's no need to use
cpus_have_const_cap(). Since adjust_protection_map() is only executed
once at boot time it would be best for it to use cpus_have_cap(), and
since do_page_fault() is executed frequently it would be best for it to
use alternatives_have_cap_unlikely().
This patch replaces the uses of cpus_have_const_cap() with
cpus_have_cap() and alternative_has_cap_unlikely(), which will avoid
generating redundant code, and should be better for all subsequent calls
at runtime. The ARM64_HAS_EPAN cpucap is added to cpucap_is_possible()
so that code can be elided entirely when this is not possible.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-10-16 11:24:45 +01:00
|
|
|
if (cpus_have_cap(ARM64_HAS_EPAN)) {
|
2022-03-03 18:00:44 +00:00
|
|
|
protection_map[VM_EXEC] = PAGE_EXECONLY;
|
|
|
|
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
|
|
|
|
}
|
|
|
|
|
2024-02-14 13:29:15 +01:00
|
|
|
if (lpa2_is_enabled())
|
|
|
|
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
|
|
|
|
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
|
|
|
|
|
2022-03-03 18:00:44 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arch_initcall(adjust_protection_map);
|
2022-04-28 23:16:13 -07:00
|
|
|
|
mm: change vm_get_page_prot() to accept vm_flags_t argument
Patch series "use vm_flags_t consistently".
The VMA flags field vma->vm_flags is of type vm_flags_t. Right now this
is exactly equivalent to unsigned long, but it should not be assumed to
be.
Much code that references vma->vm_flags already correctly uses vm_flags_t,
but a fairly large chunk of code simply uses unsigned long and assumes
that the two are equivalent.
This series corrects that and has us use vm_flags_t consistently.
This series is motivated by the desire to, in a future series, adjust
vm_flags_t to be a u64 regardless of whether the kernel is 32-bit or
64-bit in order to deal with the VMA flag exhaustion issue and avoid all
the various problems that arise from it (being unable to use certain
features in 32-bit, being unable to add new flags except for 64-bit, etc.)
This is therefore a critical first step towards that goal. At any rate,
using the correct type is of value regardless.
We additionally take the opportunity to refer to VMA flags as vm_flags
where possible to make clear what we're referring to.
Overall, this series does not introduce any functional change.
This patch (of 3):
We abstract the type of the VMA flags to vm_flags_t, however in may places
it is simply assumed this is unsigned long, which is simply incorrect.
At the moment this is simply an incongruity, however in future we plan to
change this type and therefore this change is a critical requirement for
doing so.
Overall, this patch does not introduce any functional change.
[lorenzo.stoakes@oracle.com: add missing vm_get_page_prot() instance, remove include]
Link: https://lkml.kernel.org/r/552f88e1-2df8-4e95-92b8-812f7c8db829@lucifer.local
Link: https://lkml.kernel.org/r/cover.1750274467.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/a12769720a2743f235643b158c4f4f0a9911daf0.1750274467.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com> [arm64]
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-06-18 20:42:52 +01:00
|
|
|
pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
|
2022-04-28 23:16:13 -07:00
|
|
|
{
|
2025-04-07 11:01:13 +05:30
|
|
|
ptdesc_t prot;
|
2024-10-01 23:58:54 +01:00
|
|
|
|
|
|
|
/* Short circuit GCS to avoid bloating the table. */
|
|
|
|
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
|
|
|
|
prot = _PAGE_GCS_RO;
|
|
|
|
} else {
|
|
|
|
prot = pgprot_val(protection_map[vm_flags &
|
2022-04-28 23:16:13 -07:00
|
|
|
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
|
2024-10-01 23:58:54 +01:00
|
|
|
}
|
2022-04-28 23:16:13 -07:00
|
|
|
|
|
|
|
if (vm_flags & VM_ARM64_BTI)
|
|
|
|
prot |= PTE_GP;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There are two conditions required for returning a Normal Tagged
|
|
|
|
* memory type: (1) the user requested it via PROT_MTE passed to
|
|
|
|
* mmap() or mprotect() and (2) the corresponding vma supports MTE. We
|
|
|
|
* register (1) as VM_MTE in the vma->vm_flags and (2) as
|
|
|
|
* VM_MTE_ALLOWED. Note that the latter can only be set during the
|
|
|
|
* mmap() call since mprotect() does not accept MAP_* flags.
|
|
|
|
* Checking for VM_MTE only is sufficient since arch_validate_flags()
|
|
|
|
* does not permit (VM_MTE & !VM_MTE_ALLOWED).
|
|
|
|
*/
|
|
|
|
if (vm_flags & VM_MTE)
|
|
|
|
prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
|
|
|
|
|
2024-08-22 16:10:57 +01:00
|
|
|
#ifdef CONFIG_ARCH_HAS_PKEYS
|
|
|
|
if (system_supports_poe()) {
|
|
|
|
if (vm_flags & VM_PKEY_BIT0)
|
|
|
|
prot |= PTE_PO_IDX_0;
|
|
|
|
if (vm_flags & VM_PKEY_BIT1)
|
|
|
|
prot |= PTE_PO_IDX_1;
|
|
|
|
if (vm_flags & VM_PKEY_BIT2)
|
|
|
|
prot |= PTE_PO_IDX_2;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2022-04-28 23:16:13 -07:00
|
|
|
return __pgprot(prot);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(vm_get_page_prot);
|