mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00

Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6, in the 4K backed HPTE pages.These bits continue to be used for 64K backed HPTE pages in this patch, but will be freed up in the next patch. The bit numbers are big-endian as defined in the ISA3.0 The patch does the following change to the 4k HTPE backed 64K PTE's format. H_PAGE_BUSY moves from bit 3 to bit 9 (B bit in the figure below) V0 which occupied bit 4 is not used anymore. V1 which occupied bit 5 is not used anymore. V2 which occupied bit 6 is not used anymore. V3 which occupied bit 7 is not used anymore. Before the patch, the 4k backed 64k PTE format was as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x|B|V0|V1|V2|V3|x| | |x|x|................|x|x|x|x| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' |S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' After the patch, the 4k backed 64k PTE format is as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x| | | | | |x|B| |x|x|................|.|.|.|.| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' |S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' the four bits S,G,I,X (one quadruplet per 4k HPTE) that cache the hash-bucket slot value, is initialized to 1,1,1,1 indicating -- an invalid slot. If a HPTE gets cached in a 1111 slot(i.e 7th slot of secondary hash bucket), it is released immediately. In other words, even though 1111 is a valid slot value in the hash bucket, we consider it invalid and release the slot and the HPTE. This gives us the opportunity to determine the validity of S,G,I,X bits based on its contents and not on any of the bits V0,V1,V2 or V3 in the primary PTE When we release a HPTE cached in the 1111 slot we also release a legitimate slot in the primary hash bucket and unmap its corresponding HPTE. This is to ensure that we do get a HPTE cached in a slot of the primary hash bucket, the next time we retry. Though treating 1111 slot as invalid, reduces the number of available slots in the hash bucket and may have an effect on the performance, the probabilty of hitting a 1111 slot is extermely low. Compared to the current scheme, the above scheme reduces the number of false hash table updates significantly and has the added advantage of releasing four valuable PTE bits for other purpose. NOTE:even though bits 3, 4, 5, 6, 7 are not used when the 64K PTE is backed by 4k HPTE, they continue to be used if the PTE gets backed by 64k HPTE. The next patch will decouple that aswell, and truely release the bits. This idea was jointly developed by Paul Mackerras, Aneesh, Michael Ellermen and myself. 4K PTE format remains unchanged currently. The patch does the following code changes a) PTE flags are split between 64k and 4k header files. b) __hash_page_4K() is reimplemented to reflect the above logic. Acked-by: Balbir Singh <bsingharora@gmail.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Ram Pai <linuxram@us.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
213 lines
6.2 KiB
C
213 lines
6.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_H
|
|
#define _ASM_POWERPC_BOOK3S_64_HASH_H
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
* Common bits between 4K and 64K pages in a linux-style PTE.
|
|
* Additional bits may be defined in pgtable-hash64-*.h
|
|
*
|
|
*/
|
|
#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS
|
|
#define H_PAGE_F_GIX_SHIFT 56
|
|
#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */
|
|
#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44)
|
|
#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
|
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
#include <asm/book3s/64/hash-64k.h>
|
|
#else
|
|
#include <asm/book3s/64/hash-4k.h>
|
|
#endif
|
|
|
|
/*
|
|
* Size of EA range mapped by our pagetables.
|
|
*/
|
|
#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
|
|
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
|
|
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
|
|
|
|
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
|
|
/*
|
|
* only with hash 64k we need to use the second half of pmd page table
|
|
* to store pointer to deposited pgtable_t
|
|
*/
|
|
#define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1)
|
|
#else
|
|
#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
|
|
#endif
|
|
/*
|
|
* Define the address range of the kernel non-linear virtual area
|
|
*/
|
|
#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
|
|
#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */
|
|
|
|
/*
|
|
* The vmalloc space starts at the beginning of that region, and
|
|
* occupies half of it on hash CPUs and a quarter of it on Book3E
|
|
* (we keep a quarter for the virtual memmap)
|
|
*/
|
|
#define H_VMALLOC_START H_KERN_VIRT_START
|
|
#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */
|
|
#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
|
|
|
|
#define H_KERN_IO_START H_VMALLOC_END
|
|
|
|
/*
|
|
* Region IDs
|
|
*/
|
|
#define REGION_SHIFT 60UL
|
|
#define REGION_MASK (0xfUL << REGION_SHIFT)
|
|
#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
|
|
|
|
#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
|
|
#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
|
|
#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
|
|
#define USER_REGION_ID (0UL)
|
|
|
|
/*
|
|
* Defines the address of the vmemap area, in its own region on
|
|
* hash table CPUs.
|
|
*/
|
|
#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
|
|
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
|
#define HAVE_ARCH_UNMAPPED_AREA
|
|
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
|
|
#endif /* CONFIG_PPC_MM_SLICES */
|
|
|
|
|
|
/* PTEIDX nibble */
|
|
#define _PTEIDX_SECONDARY 0x8
|
|
#define _PTEIDX_GROUP_IX 0x7
|
|
|
|
#define H_PMD_BAD_BITS (PTE_TABLE_SIZE-1)
|
|
#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
|
|
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
|
|
static inline int hash__pgd_bad(pgd_t pgd)
|
|
{
|
|
return (pgd_val(pgd) == 0);
|
|
}
|
|
#ifdef CONFIG_STRICT_KERNEL_RWX
|
|
extern void hash__mark_rodata_ro(void);
|
|
extern void hash__mark_initmem_nx(void);
|
|
#endif
|
|
|
|
extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, unsigned long pte, int huge);
|
|
extern unsigned long htab_convert_pte_flags(unsigned long pteflags);
|
|
/* Atomic PTE updates */
|
|
static inline unsigned long hash__pte_update(struct mm_struct *mm,
|
|
unsigned long addr,
|
|
pte_t *ptep, unsigned long clr,
|
|
unsigned long set,
|
|
int huge)
|
|
{
|
|
__be64 old_be, tmp_be;
|
|
unsigned long old;
|
|
|
|
__asm__ __volatile__(
|
|
"1: ldarx %0,0,%3 # pte_update\n\
|
|
and. %1,%0,%6\n\
|
|
bne- 1b \n\
|
|
andc %1,%0,%4 \n\
|
|
or %1,%1,%7\n\
|
|
stdcx. %1,0,%3 \n\
|
|
bne- 1b"
|
|
: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
|
|
: "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep),
|
|
"r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
|
|
: "cc" );
|
|
/* huge pages use the old page table lock */
|
|
if (!huge)
|
|
assert_pte_locked(mm, addr);
|
|
|
|
old = be64_to_cpu(old_be);
|
|
if (old & H_PAGE_HASHPTE)
|
|
hpte_need_flush(mm, addr, ptep, old, huge);
|
|
|
|
return old;
|
|
}
|
|
|
|
/* Set the dirty and/or accessed bits atomically in a linux PTE, this
|
|
* function doesn't need to flush the hash entry
|
|
*/
|
|
static inline void hash__ptep_set_access_flags(pte_t *ptep, pte_t entry)
|
|
{
|
|
__be64 old, tmp, val, mask;
|
|
|
|
mask = cpu_to_be64(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE |
|
|
_PAGE_EXEC | _PAGE_SOFT_DIRTY);
|
|
|
|
val = pte_raw(entry) & mask;
|
|
|
|
__asm__ __volatile__(
|
|
"1: ldarx %0,0,%4\n\
|
|
and. %1,%0,%6\n\
|
|
bne- 1b \n\
|
|
or %0,%3,%0\n\
|
|
stdcx. %0,0,%4\n\
|
|
bne- 1b"
|
|
:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
|
|
:"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY))
|
|
:"cc");
|
|
}
|
|
|
|
static inline int hash__pte_same(pte_t pte_a, pte_t pte_b)
|
|
{
|
|
return (((pte_raw(pte_a) ^ pte_raw(pte_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
|
|
}
|
|
|
|
static inline int hash__pte_none(pte_t pte)
|
|
{
|
|
return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
|
|
}
|
|
|
|
unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
|
|
int ssize, real_pte_t rpte, unsigned int subpg_index);
|
|
|
|
/* This low level function performs the actual PTE insertion
|
|
* Setting the PTE depends on the MMU type and other factors. It's
|
|
* an horrible mess that I'm not going to try to clean up now but
|
|
* I'm keeping it in one place rather than spread around
|
|
*/
|
|
static inline void hash__set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t pte, int percpu)
|
|
{
|
|
/*
|
|
* Anything else just stores the PTE normally. That covers all 64-bit
|
|
* cases, and 32-bit non-hash with 32-bit PTEs.
|
|
*/
|
|
*ptep = pte;
|
|
}
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
|
|
pmd_t *pmdp, unsigned long old_pmd);
|
|
#else
|
|
static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
|
|
unsigned long addr, pmd_t *pmdp,
|
|
unsigned long old_pmd)
|
|
{
|
|
WARN(1, "%s called with THP disabled\n", __func__);
|
|
}
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
|
|
extern int hash__map_kernel_page(unsigned long ea, unsigned long pa,
|
|
unsigned long flags);
|
|
extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
|
|
unsigned long page_size,
|
|
unsigned long phys);
|
|
extern void hash__vmemmap_remove_mapping(unsigned long start,
|
|
unsigned long page_size);
|
|
|
|
int hash__create_section_mapping(unsigned long start, unsigned long end);
|
|
int hash__remove_section_mapping(unsigned long start, unsigned long end);
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
|