linux/arch/csky/include/asm/pgtable.h

270 lines
6.5 KiB
C
Raw Permalink Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_PGTABLE_H
#define __ASM_CSKY_PGTABLE_H
#include <asm/fixmap.h>
#include <asm/memory.h>
#include <asm/addrspace.h>
#include <abi/pgtable-bits.h>
#include <asm-generic/pgtable-nopmd.h>
#define PGDIR_SHIFT 22
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD (PAGE_OFFSET/PGDIR_SIZE)
/*
* C-SKY is two-level paging structure:
*/
#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
#define PTRS_PER_PMD 1
csky: drop definition of PTE_ORDER Patch series "arch: make PxD_ORDER generically available", v2. The question what does PxD_ORDER define raises from time to time and there is still a conflict between MIPS and DAX definitions. Some time ago Matthew Wilcox suggested to use PMD_TABLE_ORDER to define the order of page table allocation: [1] https://lore.kernel.org/linux-arch/YPCJftSTUBEnq2lI@casper.infradead.org/ The parisc patch made it in, but mips didn't. Now mips defines from asm/include/pgtable.h were copied to loongarch which made it worse. Let's deal with it once and for all and rename PxD_ORDER defines to PxD_TABLE_ORDER or just drop them when the only possible order of page table is 0. This patch (of 15): This is the order of the page table allocation, not the order of a PTE. Since its always hardwired to 0, simply drop it. Link: https://lkml.kernel.org/r/20220705154708.181258-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20220703141203.147893-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20220703141203.147893-2-rppt@kernel.org Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Dinh Nguyen <dinguyen@kernel.org> Cc: Guo Ren <guoren@kernel.org> Cc: Helge Deller <deller@gmx.de> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Xuerui Wang <kernel@xen0n.name> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-07-03 17:11:50 +03:00
#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
#define PFN_PTE_SHIFT PAGE_SHIFT
#define pmd_pfn(pmd) (pmd_phys(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
#define pte_clear(mm, addr, ptep) set_pte((ptep), \
(((unsigned int) addr >= PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0)))
#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL))
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
#define pte_pfn(x) ((unsigned long)((x).pte_low >> PAGE_SHIFT))
#define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << PAGE_SHIFT) \
| pgprot_val(prot))
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define __mk_pte(page_nr, pgprot) __pte(((page_nr) << PAGE_SHIFT) | \
pgprot_val(pgprot))
/*
* C-SKY only has VALID and DIRTY bit in hardware. So we need to use the
* two bits emulate PRESENT, READ, WRITE, EXEC, MODIFIED, ACCESSED.
*/
csky: Fixup _PAGE_ACCESSED for default pgprot When the system memory is exhausted, linux will trigger kswapd to shrink memory page cache. We found the csky's .text file mapping pages would be reclaimed earlier than arm's elf. Because csky doesn't give _PAGE_ACCESSED for default pgprot and in zap_pte_range if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); mark_page_accessed will put the pages into active lru list. [ 3.652722] delete busybox page from inactive file list Call Trace: [<9012a376>] dump_stack+0xe/0x24 [<9012a370>] dump_stack+0x8/0x24 [<9005b780>] activate_page+0x2b4/0x2d4 [<90132502>] vsnprintf+0x2c6/0x374 [<9005b880>] mark_page_accessed+0xe0/0x150 [<9006903e>] unmap_page_range+0x166/0x33c [<90021844>] get_signal+0x98/0x3b4 [<90069232>] unmap_single_vma+0x1e/0x24 [<90069462>] unmap_vmas+0x26/0x40 [<9006d3d8>] exit_mmap+0x60/0xbc [<9006a140>] handle_mm_fault+0x700/0xcec [<900426b2>] ktime_get_with_offset+0x86/0x130 [<90017566>] mmput+0x2e/0x90 [<9001a30a>] do_exit+0x13e/0x6f0 [<90015448>] page_fault_end+0x14/0x74 [<9001b4bc>] SyS_exit_group+0x0/0xc [<9001b47c>] do_group_exit+0x2c/0x6c [<9001b4c8>] __wake_up_parent+0x0/0x20 [<9001399e>] csky_systemcall+0x6e/0x72 csky will throw the pages at first and keep them in active lru list later after real accessed, but arm would keep them in active lru list at the beginning. The following are statistics of different architecture styles: Default _PAGE_ACCESSED: alpha, arm, arm64, ia64, m68k, microblaze, openrisc, powerpc, riscv, sh, um, x86, xtensa Not def _PAGE_ACCESSED: arc, c6x, h8300, hexgon, mips, s390, nds32, nios2, parisc, sparc Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Co-developed-by: Xu Kai <xukai@nationalchip.com> Signed-off-by: Xu Kai <xukai@nationalchip.com>
2021-01-11 20:16:28 +08:00
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED)
#define PAGE_NONE __pgprot(_PAGE_PROT_NONE)
#define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ | \
_CACHE_CACHED)
#define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE | \
_CACHE_CACHED)
#define PAGE_SHARED PAGE_WRITE
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_VALID | \
_PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED | \
_PAGE_GLOBAL | \
_CACHE_CACHED)
#define _PAGE_IOREMAP (_PAGE_BASE | _PAGE_READ | _PAGE_VALID | \
_PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED | \
_PAGE_GLOBAL | \
_CACHE_UNCACHED | _PAGE_SO)
#define _PAGE_CHG_MASK (~(unsigned long) \
(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_CACHE_MASK | _PAGE_GLOBAL))
#define MAX_SWAPFILES_CHECK() \
BUILD_BUG_ON(MAX_SWAPFILES_SHIFT != 5)
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
extern void load_pgd(unsigned long pg_dir);
extern pte_t invalid_pte_table[PTRS_PER_PTE];
static inline void set_pte(pte_t *p, pte_t pte)
{
*p = pte;
#if defined(CONFIG_CPU_NEED_TLBSYNC)
dcache_wb_line((u32)p);
#endif
/* prevent out of order excution */
smp_mb();
}
static inline pte_t *pmd_page_vaddr(pmd_t pmd)
{
unsigned long ptr;
ptr = pmd_val(pmd);
return __va(ptr);
}
#define pmd_phys(pmd) pmd_val(pmd)
static inline void set_pmd(pmd_t *p, pmd_t pmd)
{
*p = pmd;
#if defined(CONFIG_CPU_NEED_TLBSYNC)
dcache_wb_line((u32)p);
#endif
/* prevent specul excute */
smp_mb();
}
static inline int pmd_none(pmd_t pmd)
{
return pmd_val(pmd) == __pa(invalid_pte_table);
}
#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK)
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) != __pa(invalid_pte_table));
}
static inline void pmd_clear(pmd_t *p)
{
pmd_val(*p) = (__pa(invalid_pte_table));
#if defined(CONFIG_CPU_NEED_TLBSYNC)
dcache_wb_line((u32)p);
#endif
}
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
static inline int pte_read(pte_t pte)
{
return pte.pte_low & _PAGE_READ;
}
static inline int pte_write(pte_t pte)
{
return (pte).pte_low & _PAGE_WRITE;
}
static inline int pte_dirty(pte_t pte)
{
return (pte).pte_low & _PAGE_MODIFIED;
}
static inline int pte_young(pte_t pte)
{
return (pte).pte_low & _PAGE_ACCESSED;
}
static inline pte_t pte_wrprotect(pte_t pte)
{
pte_val(pte) &= ~(_PAGE_WRITE | _PAGE_DIRTY);
return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
{
pte_val(pte) &= ~(_PAGE_MODIFIED|_PAGE_DIRTY);
return pte;
}
static inline pte_t pte_mkold(pte_t pte)
{
pte_val(pte) &= ~(_PAGE_ACCESSED|_PAGE_VALID);
return pte;
}
mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma() The x86 Shadow stack feature includes a new type of memory called shadow stack. This shadow stack memory has some unusual properties, which requires some core mm changes to function properly. One of these unusual properties is that shadow stack memory is writable, but only in limited ways. These limits are applied via a specific PTE bit combination. Nevertheless, the memory is writable, and core mm code will need to apply the writable permissions in the typical paths that call pte_mkwrite(). The goal is to make pte_mkwrite() take a VMA, so that the x86 implementation of it can know whether to create regular writable or shadow stack mappings. But there are a couple of challenges to this. Modifying the signatures of each arch pte_mkwrite() implementation would be error prone because some are generated with macros and would need to be re-implemented. Also, some pte_mkwrite() callers operate on kernel memory without a VMA. So this can be done in a three step process. First pte_mkwrite() can be renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() added that just calls pte_mkwrite_novma(). Next callers without a VMA can be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers can be changed to take/pass a VMA. Start the process by renaming pte_mkwrite() to pte_mkwrite_novma() and adding the pte_mkwrite() wrapper in linux/pgtable.h. Apply the same pattern for pmd_mkwrite(). Since not all archs have a pmd_mkwrite_novma(), create a new arch config HAS_HUGE_PAGE that can be used to tell if pmd_mkwrite() should be defined. Otherwise in the !HAS_HUGE_PAGE cases the compiler would not be able to find pmd_mkwrite_novma(). No functional change. Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> Acked-by: David Hildenbrand <david@redhat.com> Link: https://lore.kernel.org/lkml/CAHk-=wiZjSu7c9sFYZb3q04108stgHff2wfbokGCCgW7riz+8Q@mail.gmail.com/ Link: https://lore.kernel.org/all/20230613001108.3040476-2-rick.p.edgecombe%40intel.com
2023-06-12 17:10:27 -07:00
static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte_val(pte) |= _PAGE_WRITE;
if (pte_val(pte) & _PAGE_MODIFIED)
pte_val(pte) |= _PAGE_DIRTY;
return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
{
pte_val(pte) |= _PAGE_MODIFIED;
if (pte_val(pte) & _PAGE_WRITE)
pte_val(pte) |= _PAGE_DIRTY;
return pte;
}
static inline pte_t pte_mkyoung(pte_t pte)
{
pte_val(pte) |= _PAGE_ACCESSED;
if (pte_val(pte) & _PAGE_READ)
pte_val(pte) |= _PAGE_VALID;
return pte;
}
static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
static inline pte_t pte_swp_mkexclusive(pte_t pte)
{
pte_val(pte) |= _PAGE_SWP_EXCLUSIVE;
return pte;
}
static inline pte_t pte_swp_clear_exclusive(pte_t pte)
{
pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE;
return pte;
}
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
/*
* Macro to make mark a page protection value as "uncacheable". Note
* that "protection" is really a misnomer here as the protection value
* contains the memory attribute bits, dirty bits, and various other
* bits as well.
*/
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
{
unsigned long prot = pgprot_val(_prot);
prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED | _PAGE_SO;
return __pgprot(prot);
}
#define pgprot_writecombine pgprot_writecombine
static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
{
unsigned long prot = pgprot_val(_prot);
prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED;
return __pgprot(prot);
}
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
(pgprot_val(newprot)));
}
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void paging_init(void);
void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
unsigned long address, pte_t *pte, unsigned int nr);
#define update_mmu_cache(vma, addr, ptep) \
update_mmu_cache_range(NULL, vma, addr, ptep, 1)
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
remap_pfn_range(vma, vaddr, pfn, size, prot)
#endif /* __ASM_CSKY_PGTABLE_H */