linux/arch/riscv/include/asm/pgalloc.h
Qi Zheng 4239c198e8 riscv: pgtable: unconditionally use tlb_remove_ptdesc()
To support fast gup, the commit 69be3fb111 ("riscv: enable
MMU_GATHER_RCU_TABLE_FREE for SMP && MMU") did the following:

1) use tlb_remove_page_ptdesc() for those platforms which use IPI to
   perform TLB shootdown

2) use tlb_remove_ptdesc() for those platforms which use SBI to perform
   TLB shootdown

The tlb_remove_page_ptdesc() is the wrapper of the tlb_remove_page().  By
design, the tlb_remove_page() should be used to remove a normal page from
a page table entry, and should not be used for page table pages.

The tlb_remove_ptdesc() is the wrapper of the tlb_remove_table(), which is
designed specifically for freeing page table pages.  If the
CONFIG_MMU_GATHER_TABLE_FREE is enabled, the tlb_remove_table() will use
semi RCU to free page table pages, that is:

 - batch table freeing: asynchronous free by RCU
 - single table freeing: IPI + synchronous free

If the CONFIG_MMU_GATHER_TABLE_FREE is disabled, the tlb_remove_table()
will fall back to pagetable_dtor() + tlb_remove_page().

For case 1), since we need to perform TLB shootdown before freeing the
page table page, the local_irq_save() in fast gup can block the freeing
and protect the fast gup page walker.  Therefore we can ensure safety by
just using tlb_remove_page_ptdesc().  In addition, we can also the
tlb_remove_ptdesc()/tlb_remove_table() to achieve it, and it doesn't
matter whether CONFIG_MMU_GATHER_RCU_TABLE_FREE is selected.  And in
theory, the performance of freeing pages asynchronously via RCU will not
be lower than synchronous free.

For case 2), since local_irq_save() only disable S-privilege IPI irq but
not M-privilege's, which is used by the SBI implementation to perform TLB
shootdown, so we must select CONFIG_MMU_GATHER_RCU_TABLE_FREE and use
tlb_remove_ptdesc() to ensure safety.  The riscv selects this config for
SMP && MMU, the CONFIG_RISCV_SBI is dependent on MMU.  Therefore, only the
UP system may have the situation where CONFIG_MMU_GATHER_RCU_TABLE_FREE is
disabled but CONFIG_RISCV_SBI is enabled.  But there is no freeing vs fast
gup race in the UP system.

So, in summary, we can use tlb_remove_ptdesc() to support fast gup in all
cases, and this interface is specifically designed for page table pages. 
So let's use it unconditionally.

Link: https://lkml.kernel.org/r/9025595e895515515c95e48db54b29afa489c41d.1740454179.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickens <hughd@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: "Mike Rapoport (IBM)" <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-04-01 15:17:14 -07:00

140 lines
3.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
* Copyright (C) 2012 Regents of the University of California
*/
#ifndef _ASM_RISCV_PGALLOC_H
#define _ASM_RISCV_PGALLOC_H
#include <linux/mm.h>
#include <asm/sbi.h>
#include <asm/tlb.h>
#ifdef CONFIG_MMU
#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
unsigned long pfn = virt_to_pfn(pte);
set_pmd(pmd, __pmd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
{
unsigned long pfn = virt_to_pfn(page_address(pte));
set_pmd(pmd, __pmd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
#ifndef __PAGETABLE_PMD_FOLDED
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
unsigned long pfn = virt_to_pfn(pmd);
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
if (pgtable_l4_enabled) {
unsigned long pfn = virt_to_pfn(pud);
set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
pud_t *pud)
{
if (pgtable_l4_enabled) {
unsigned long pfn = virt_to_pfn(pud);
set_p4d_safe(p4d,
__p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
if (pgtable_l5_enabled) {
unsigned long pfn = virt_to_pfn(p4d);
set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
p4d_t *p4d)
{
if (pgtable_l5_enabled) {
unsigned long pfn = virt_to_pfn(p4d);
set_pgd_safe(pgd,
__pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
#define pud_free pud_free
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
if (pgtable_l4_enabled)
__pud_free(mm, pud);
}
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
unsigned long addr)
{
if (pgtable_l4_enabled)
tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud));
}
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long addr)
{
if (pgtable_l5_enabled)
tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
#endif /* __PAGETABLE_PMD_FOLDED */
static inline void sync_kernel_mappings(pgd_t *pgd)
{
memcpy(pgd + USER_PTRS_PER_PGD,
init_mm.pgd + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pgd = __pgd_alloc(mm, 0);
if (likely(pgd != NULL)) {
/* Copy kernel mappings */
sync_kernel_mappings(pgd);
}
return pgd;
}
#ifndef __PAGETABLE_PMD_FOLDED
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr)
{
tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
}
#endif /* __PAGETABLE_PMD_FOLDED */
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long addr)
{
tlb_remove_ptdesc(tlb, page_ptdesc(pte));
}
#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_PGALLOC_H */