linux/arch/sparc/mm/tlb.c
Ryan Roberts a1d416bf9f sparc/mm: disable preemption in lazy mmu mode
Since commit 38e0edb15b ("mm/apply_to_range: call pte function with lazy
updates") it's been possible for arch_[enter|leave]_lazy_mmu_mode() to be
called without holding a page table lock (for the kernel mappings case),
and therefore it is possible that preemption may occur while in the lazy
mmu mode.  The Sparc lazy mmu implementation is not robust to preemption
since it stores the lazy mode state in a per-cpu structure and does not
attempt to manage that state on task switch.

Powerpc had the same issue and fixed it by explicitly disabling preemption
in arch_enter_lazy_mmu_mode() and re-enabling in
arch_leave_lazy_mmu_mode().  See commit b9ef323ea1 ("powerpc/64s:
Disable preemption in hash lazy mmu mode").

Given Sparc's lazy mmu mode is based on powerpc's, let's fix it in the
same way here.

Link: https://lkml.kernel.org/r/20250303141542.3371656-4-ryan.roberts@arm.com
Fixes: 38e0edb15b ("mm/apply_to_range: call pte function with lazy updates")
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Andreas Larsson <andreas@gaisler.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juegren Gross <jgross@suse.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-03-17 00:05:34 -07:00

308 lines
6.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* arch/sparc64/mm/tlb.c
*
* Copyright (C) 2004 David S. Miller <davem@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/preempt.h>
#include <linux/pagemap.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
/* Heavily inspired by the ppc64 code. */
static DEFINE_PER_CPU(struct tlb_batch, tlb_batch);
void flush_tlb_pending(void)
{
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
struct mm_struct *mm = tb->mm;
if (!tb->tlb_nr)
goto out;
flush_tsb_user(tb);
if (CTX_VALID(mm->context)) {
if (tb->tlb_nr == 1) {
global_flush_tlb_page(mm, tb->vaddrs[0]);
} else {
#ifdef CONFIG_SMP
smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
&tb->vaddrs[0]);
#else
__flush_tlb_pending(CTX_HWBITS(tb->mm->context),
tb->tlb_nr, &tb->vaddrs[0]);
#endif
}
}
tb->tlb_nr = 0;
out:
put_cpu_var(tlb_batch);
}
void arch_enter_lazy_mmu_mode(void)
{
struct tlb_batch *tb;
preempt_disable();
tb = this_cpu_ptr(&tlb_batch);
tb->active = 1;
}
void arch_leave_lazy_mmu_mode(void)
{
struct tlb_batch *tb = this_cpu_ptr(&tlb_batch);
if (tb->tlb_nr)
flush_tlb_pending();
tb->active = 0;
preempt_enable();
}
static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
bool exec, unsigned int hugepage_shift)
{
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
vaddr &= PAGE_MASK;
if (exec)
vaddr |= 0x1UL;
nr = tb->tlb_nr;
if (unlikely(nr != 0 && mm != tb->mm)) {
flush_tlb_pending();
nr = 0;
}
if (!tb->active) {
flush_tsb_user_page(mm, vaddr, hugepage_shift);
global_flush_tlb_page(mm, vaddr);
goto out;
}
if (nr == 0) {
tb->mm = mm;
tb->hugepage_shift = hugepage_shift;
}
if (tb->hugepage_shift != hugepage_shift) {
flush_tlb_pending();
tb->hugepage_shift = hugepage_shift;
nr = 0;
}
tb->vaddrs[nr] = vaddr;
tb->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
out:
put_cpu_var(tlb_batch);
}
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm,
unsigned int hugepage_shift)
{
if (tlb_type != hypervisor &&
pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
struct address_space *mapping;
struct page *page;
struct folio *folio;
if (!pfn_valid(pfn))
goto no_cache_flush;
page = pfn_to_page(pfn);
if (PageReserved(page))
goto no_cache_flush;
/* A real file page? */
folio = page_folio(page);
mapping = folio_flush_mapping(folio);
if (!mapping)
goto no_cache_flush;
paddr = (unsigned long) page_address(page);
if ((paddr ^ vaddr) & (1 << 13))
flush_dcache_folio_all(mm, folio);
}
no_cache_flush:
if (!fullmm)
tlb_batch_add_one(mm, vaddr, pte_exec(orig), hugepage_shift);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
pmd_t pmd)
{
unsigned long end;
pte_t *pte;
pte = pte_offset_map(&pmd, vaddr);
if (!pte)
return;
end = vaddr + HPAGE_SIZE;
while (vaddr < end) {
if (pte_val(*pte) & _PAGE_VALID) {
bool exec = pte_exec(*pte);
tlb_batch_add_one(mm, vaddr, exec, PAGE_SHIFT);
}
pte++;
vaddr += PAGE_SIZE;
}
pte_unmap(pte);
}
static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr,
pmd_t orig, pmd_t pmd)
{
if (mm == &init_mm)
return;
if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
/*
* Note that this routine only sets pmds for THP pages.
* Hugetlb pages are handled elsewhere. We need to check
* for huge zero page. Huge zero pages are like hugetlb
* pages in that there is no RSS, but there is the need
* for TSB entries. So, huge zero page counts go into
* hugetlb_pte_count.
*/
if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
if (is_huge_zero_pmd(pmd))
mm->context.hugetlb_pte_count++;
else
mm->context.thp_pte_count++;
} else {
if (is_huge_zero_pmd(orig))
mm->context.hugetlb_pte_count--;
else
mm->context.thp_pte_count--;
}
/* Do not try to allocate the TSB hash table if we
* don't have one already. We have various locks held
* and thus we'll end up doing a GFP_KERNEL allocation
* in an atomic context.
*
* Instead, we let the first TLB miss on a hugepage
* take care of this.
*/
}
if (!pmd_none(orig)) {
addr &= HPAGE_MASK;
if (pmd_trans_huge(orig)) {
pte_t orig_pte = __pte(pmd_val(orig));
bool exec = pte_exec(orig_pte);
tlb_batch_add_one(mm, addr, exec, REAL_HPAGE_SHIFT);
tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
REAL_HPAGE_SHIFT);
} else {
tlb_batch_pmd_scan(mm, addr, orig);
}
}
}
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
pmd_t orig = *pmdp;
*pmdp = pmd;
__set_pmd_acct(mm, addr, orig, pmd);
}
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
pmd_t old;
do {
old = *pmdp;
} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
__set_pmd_acct(vma->vm_mm, address, old, pmd);
return old;
}
/*
* This routine is only called when splitting a THP
*/
pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t old, entry;
VM_WARN_ON_ONCE(!pmd_present(*pmdp));
entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID);
old = pmdp_establish(vma, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
* set_pmd_at() will not be called in a way to decrement
* thp_pte_count when splitting a THP, so do it now.
* Sanity check pmd before doing the actual decrement.
*/
if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
!is_huge_zero_pmd(entry))
(vma->vm_mm)->context.thp_pte_count--;
return old;
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
assert_spin_locked(&mm->page_table_lock);
/* FIFO */
if (!pmd_huge_pte(mm, pmdp))
INIT_LIST_HEAD(lh);
else
list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
pmd_huge_pte(mm, pmdp) = pgtable;
}
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
struct list_head *lh;
pgtable_t pgtable;
assert_spin_locked(&mm->page_table_lock);
/* FIFO */
pgtable = pmd_huge_pte(mm, pmdp);
lh = (struct list_head *) pgtable;
if (list_empty(lh))
pmd_huge_pte(mm, pmdp) = NULL;
else {
pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
list_del(lh);
}
pte_val(pgtable[0]) = 0;
pte_val(pgtable[1]) = 0;
return pgtable;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */