2024-09-24 15:32:20 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
/*
|
|
|
|
* Copyright (C) 2024 Loongson Technology Corporation Limited
|
|
|
|
*/
|
|
|
|
|
2025-02-13 12:02:35 +08:00
|
|
|
#include <linux/memblock.h>
|
2024-09-24 15:32:20 +08:00
|
|
|
#include <linux/pagewalk.h>
|
|
|
|
#include <linux/pgtable.h>
|
|
|
|
#include <asm/set_memory.h>
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
|
|
|
|
struct pageattr_masks {
|
|
|
|
pgprot_t set_mask;
|
|
|
|
pgprot_t clear_mask;
|
|
|
|
};
|
|
|
|
|
|
|
|
static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
unsigned long new_val = val;
|
|
|
|
struct pageattr_masks *masks = walk->private;
|
|
|
|
|
|
|
|
new_val &= ~(pgprot_val(masks->clear_mask));
|
|
|
|
new_val |= (pgprot_val(masks->set_mask));
|
|
|
|
|
|
|
|
return new_val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
|
|
|
|
unsigned long next, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
pgd_t val = pgdp_get(pgd);
|
|
|
|
|
|
|
|
if (pgd_leaf(val)) {
|
|
|
|
val = __pgd(set_pageattr_masks(pgd_val(val), walk));
|
|
|
|
set_pgd(pgd, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
|
|
|
|
unsigned long next, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
p4d_t val = p4dp_get(p4d);
|
|
|
|
|
|
|
|
if (p4d_leaf(val)) {
|
|
|
|
val = __p4d(set_pageattr_masks(p4d_val(val), walk));
|
|
|
|
set_p4d(p4d, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
|
|
|
|
unsigned long next, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
pud_t val = pudp_get(pud);
|
|
|
|
|
|
|
|
if (pud_leaf(val)) {
|
|
|
|
val = __pud(set_pageattr_masks(pud_val(val), walk));
|
|
|
|
set_pud(pud, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
|
|
|
|
unsigned long next, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
pmd_t val = pmdp_get(pmd);
|
|
|
|
|
|
|
|
if (pmd_leaf(val)) {
|
|
|
|
val = __pmd(set_pageattr_masks(pmd_val(val), walk));
|
|
|
|
set_pmd(pmd, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
|
|
|
|
unsigned long next, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
pte_t val = ptep_get(pte);
|
|
|
|
|
|
|
|
val = __pte(set_pageattr_masks(pte_val(val), walk));
|
|
|
|
set_pte(pte, val);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pageattr_pte_hole(unsigned long addr, unsigned long next,
|
|
|
|
int depth, struct mm_walk *walk)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct mm_walk_ops pageattr_ops = {
|
|
|
|
.pgd_entry = pageattr_pgd_entry,
|
|
|
|
.p4d_entry = pageattr_p4d_entry,
|
|
|
|
.pud_entry = pageattr_pud_entry,
|
|
|
|
.pmd_entry = pageattr_pmd_entry,
|
|
|
|
.pte_entry = pageattr_pte_entry,
|
|
|
|
.pte_hole = pageattr_pte_hole,
|
|
|
|
.walk_lock = PGWALK_RDLOCK,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
unsigned long start = addr;
|
|
|
|
unsigned long end = start + PAGE_SIZE * numpages;
|
|
|
|
struct pageattr_masks masks = {
|
|
|
|
.set_mask = set_mask,
|
|
|
|
.clear_mask = clear_mask
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!numpages)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
mmap_write_lock(&init_mm);
|
mm/pagewalk: split walk_page_range_novma() into kernel/user parts
walk_page_range_novma() is rather confusing - it supports two modes, one
used often, the other used only for debugging.
The first mode is the common case of traversal of kernel page tables,
which is what nearly all callers use this for.
Secondly it provides an unusual debugging interface that allows for the
traversal of page tables in a userland range of memory even for that
memory which is not described by a VMA.
It is far from certain that such page tables should even exist, but
perhaps this is precisely why it is useful as a debugging mechanism.
As a result, this is utilised by ptdump only. Historically, things were
reversed - ptdump was the only user, and other parts of the kernel evolved
to use the kernel page table walking here.
Since we have some complicated and confusing locking rules for the novma
case, it makes sense to separate the two usages into their own functions.
Doing this also provide self-documentation as to the intent of the caller
- are they doing something rather unusual or are they simply doing a
standard kernel page table walk?
We therefore establish two separate functions - walk_page_range_debug()
for this single usage, and walk_kernel_page_table_range() for general
kernel page table walking.
The walk_page_range_debug() function is currently used to traverse both
userland and kernel mappings, so we maintain this and in the case of
kernel mappings being traversed, we have walk_page_range_debug() invoke
walk_kernel_page_table_range() internally.
We additionally make walk_page_range_debug() internal to mm.
Link: https://lkml.kernel.org/r/20250605135104.90720-1-lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Barry Song <baohua@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-06-05 14:51:04 +01:00
|
|
|
ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL, &masks);
|
2024-09-24 15:32:20 +08:00
|
|
|
mmap_write_unlock(&init_mm);
|
|
|
|
|
|
|
|
flush_tlb_kernel_range(start, end);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int set_memory_x(unsigned long addr, int numpages)
|
|
|
|
{
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_NO_EXEC));
|
|
|
|
}
|
|
|
|
|
|
|
|
int set_memory_nx(unsigned long addr, int numpages)
|
|
|
|
{
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __set_memory(addr, numpages, __pgprot(_PAGE_NO_EXEC), __pgprot(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
int set_memory_ro(unsigned long addr, int numpages)
|
|
|
|
{
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_WRITE | _PAGE_DIRTY));
|
|
|
|
}
|
|
|
|
|
|
|
|
int set_memory_rw(unsigned long addr, int numpages)
|
|
|
|
{
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __set_memory(addr, numpages, __pgprot(_PAGE_WRITE | _PAGE_DIRTY), __pgprot(0));
|
|
|
|
}
|
2024-09-24 15:32:20 +08:00
|
|
|
|
|
|
|
bool kernel_page_present(struct page *page)
|
|
|
|
{
|
|
|
|
pgd_t *pgd;
|
|
|
|
p4d_t *p4d;
|
|
|
|
pud_t *pud;
|
|
|
|
pmd_t *pmd;
|
|
|
|
pte_t *pte;
|
|
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
|
|
|
|
|
|
if (addr < vm_map_base)
|
2025-02-13 12:02:35 +08:00
|
|
|
return memblock_is_memory(__pa(addr));
|
2024-09-24 15:32:20 +08:00
|
|
|
|
|
|
|
pgd = pgd_offset_k(addr);
|
|
|
|
if (pgd_none(pgdp_get(pgd)))
|
|
|
|
return false;
|
|
|
|
if (pgd_leaf(pgdp_get(pgd)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
p4d = p4d_offset(pgd, addr);
|
|
|
|
if (p4d_none(p4dp_get(p4d)))
|
|
|
|
return false;
|
|
|
|
if (p4d_leaf(p4dp_get(p4d)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
pud = pud_offset(p4d, addr);
|
|
|
|
if (pud_none(pudp_get(pud)))
|
|
|
|
return false;
|
|
|
|
if (pud_leaf(pudp_get(pud)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
pmd = pmd_offset(pud, addr);
|
|
|
|
if (pmd_none(pmdp_get(pmd)))
|
|
|
|
return false;
|
|
|
|
if (pmd_leaf(pmdp_get(pmd)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
pte = pte_offset_kernel(pmd, addr);
|
|
|
|
return pte_present(ptep_get(pte));
|
|
|
|
}
|
|
|
|
|
|
|
|
int set_direct_map_default_noflush(struct page *page)
|
|
|
|
{
|
|
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
|
|
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __set_memory(addr, 1, PAGE_KERNEL, __pgprot(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
int set_direct_map_invalid_noflush(struct page *page)
|
|
|
|
{
|
|
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
|
|
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __set_memory(addr, 1, __pgprot(0), __pgprot(_PAGE_PRESENT | _PAGE_VALID));
|
|
|
|
}
|
2024-10-23 19:27:08 +03:00
|
|
|
|
|
|
|
int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
|
|
|
|
{
|
|
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
|
|
pgprot_t set, clear;
|
|
|
|
|
|
|
|
if (addr < vm_map_base)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (valid) {
|
|
|
|
set = PAGE_KERNEL;
|
|
|
|
clear = __pgprot(0);
|
|
|
|
} else {
|
|
|
|
set = __pgprot(0);
|
|
|
|
clear = __pgprot(_PAGE_PRESENT | _PAGE_VALID);
|
|
|
|
}
|
|
|
|
|
|
|
|
return __set_memory(addr, 1, set, clear);
|
|
|
|
}
|