2019-06-04 10:11:33 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
2008-08-02 10:55:55 +01:00
|
|
|
* arch/arm/include/asm/tlbflush.h
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
|
|
|
* Copyright (C) 1999-2003 Russell King
|
|
|
|
*/
|
|
|
|
#ifndef _ASMARM_TLBFLUSH_H
|
|
|
|
#define _ASMARM_TLBFLUSH_H
|
|
|
|
|
2017-02-04 00:16:44 +01:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
# include <linux/mm_types.h>
|
|
|
|
#endif
|
|
|
|
|
2011-02-20 12:27:49 +00:00
|
|
|
#ifdef CONFIG_MMU
|
2006-02-24 21:41:25 +00:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <asm/glue.h>
|
|
|
|
|
|
|
|
#define TLB_V4_U_PAGE (1 << 1)
|
|
|
|
#define TLB_V4_D_PAGE (1 << 2)
|
|
|
|
#define TLB_V4_I_PAGE (1 << 3)
|
|
|
|
#define TLB_V6_U_PAGE (1 << 4)
|
|
|
|
#define TLB_V6_D_PAGE (1 << 5)
|
|
|
|
#define TLB_V6_I_PAGE (1 << 6)
|
|
|
|
|
|
|
|
#define TLB_V4_U_FULL (1 << 9)
|
|
|
|
#define TLB_V4_D_FULL (1 << 10)
|
|
|
|
#define TLB_V4_I_FULL (1 << 11)
|
|
|
|
#define TLB_V6_U_FULL (1 << 12)
|
|
|
|
#define TLB_V6_D_FULL (1 << 13)
|
|
|
|
#define TLB_V6_I_FULL (1 << 14)
|
|
|
|
|
|
|
|
#define TLB_V6_U_ASID (1 << 16)
|
|
|
|
#define TLB_V6_D_ASID (1 << 17)
|
|
|
|
#define TLB_V6_I_ASID (1 << 18)
|
|
|
|
|
2013-02-28 17:48:11 +01:00
|
|
|
#define TLB_V6_BP (1 << 19)
|
|
|
|
|
2009-05-30 14:00:14 +01:00
|
|
|
/* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
|
2013-02-28 17:48:11 +01:00
|
|
|
#define TLB_V7_UIS_PAGE (1 << 20)
|
|
|
|
#define TLB_V7_UIS_FULL (1 << 21)
|
|
|
|
#define TLB_V7_UIS_ASID (1 << 22)
|
|
|
|
#define TLB_V7_UIS_BP (1 << 23)
|
2009-05-30 14:00:14 +01:00
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
#define TLB_BARRIER (1 << 28)
|
2008-06-22 22:45:04 +02:00
|
|
|
#define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */
|
2005-04-16 15:20:36 -07:00
|
|
|
#define TLB_DCLEAN (1 << 30)
|
|
|
|
#define TLB_WB (1 << 31)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MMU TLB Model
|
|
|
|
* =============
|
|
|
|
*
|
|
|
|
* We have the following to choose from:
|
|
|
|
* v4 - ARMv4 without write buffer
|
|
|
|
* v4wb - ARMv4 with write buffer without I TLB flush entry instruction
|
|
|
|
* v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
|
2008-06-22 22:45:04 +02:00
|
|
|
* fr - Feroceon (v4wbi with non-outer-cacheable page table walks)
|
2011-07-05 09:01:13 +01:00
|
|
|
* fa - Faraday (v4 with write buffer with UTLB)
|
2005-04-16 15:20:36 -07:00
|
|
|
* v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
|
2008-08-12 00:04:15 +01:00
|
|
|
* v7wbi - identical to v6wbi
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
|
|
|
#undef _TLB
|
|
|
|
#undef MULTI_TLB
|
|
|
|
|
2010-09-04 10:47:48 +01:00
|
|
|
#ifdef CONFIG_SMP_ON_UP
|
|
|
|
#define MULTI_TLB 1
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#define v4_tlb_flags (TLB_V4_U_FULL | TLB_V4_U_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V4WT
|
|
|
|
# define v4_possible_flags v4_tlb_flags
|
|
|
|
# define v4_always_flags v4_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v4_possible_flags 0
|
|
|
|
# define v4_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
#define fa_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
|
2009-03-25 13:10:01 +02:00
|
|
|
TLB_V4_U_FULL | TLB_V4_U_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_FA
|
|
|
|
# define fa_possible_flags fa_tlb_flags
|
|
|
|
# define fa_always_flags fa_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB fa
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define fa_possible_flags 0
|
|
|
|
# define fa_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#define v4wbi_tlb_flags (TLB_WB | TLB_DCLEAN | \
|
|
|
|
TLB_V4_I_FULL | TLB_V4_D_FULL | \
|
|
|
|
TLB_V4_I_PAGE | TLB_V4_D_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V4WBI
|
|
|
|
# define v4wbi_possible_flags v4wbi_tlb_flags
|
|
|
|
# define v4wbi_always_flags v4wbi_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v4wbi_possible_flags 0
|
|
|
|
# define v4wbi_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2008-06-22 22:45:04 +02:00
|
|
|
#define fr_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_L2CLEAN_FR | \
|
|
|
|
TLB_V4_I_FULL | TLB_V4_D_FULL | \
|
|
|
|
TLB_V4_I_PAGE | TLB_V4_D_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_FEROCEON
|
|
|
|
# define fr_possible_flags fr_tlb_flags
|
|
|
|
# define fr_always_flags fr_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define fr_possible_flags 0
|
|
|
|
# define fr_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#define v4wb_tlb_flags (TLB_WB | TLB_DCLEAN | \
|
|
|
|
TLB_V4_I_FULL | TLB_V4_D_FULL | \
|
|
|
|
TLB_V4_D_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V4WB
|
|
|
|
# define v4wb_possible_flags v4wb_tlb_flags
|
|
|
|
# define v4wb_always_flags v4wb_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4wb
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v4wb_possible_flags 0
|
|
|
|
# define v4wb_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
|
2005-04-16 15:20:36 -07:00
|
|
|
TLB_V6_I_FULL | TLB_V6_D_FULL | \
|
|
|
|
TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
|
2013-02-28 17:48:11 +01:00
|
|
|
TLB_V6_I_ASID | TLB_V6_D_ASID | \
|
|
|
|
TLB_V6_BP)
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V6
|
|
|
|
# define v6wbi_possible_flags v6wbi_tlb_flags
|
|
|
|
# define v6wbi_always_flags v6wbi_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v6wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v6wbi_possible_flags 0
|
|
|
|
# define v6wbi_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2013-04-03 17:16:57 +01:00
|
|
|
#define v7wbi_tlb_flags_smp (TLB_WB | TLB_BARRIER | \
|
2013-02-28 17:48:11 +01:00
|
|
|
TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \
|
|
|
|
TLB_V7_UIS_ASID | TLB_V7_UIS_BP)
|
2011-07-05 09:01:13 +01:00
|
|
|
#define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
|
2013-02-28 17:48:11 +01:00
|
|
|
TLB_V6_U_FULL | TLB_V6_U_PAGE | \
|
|
|
|
TLB_V6_U_ASID | TLB_V6_BP)
|
2009-05-30 14:00:14 +01:00
|
|
|
|
2007-05-18 11:25:31 +01:00
|
|
|
#ifdef CONFIG_CPU_TLB_V7
|
2010-09-04 10:47:48 +01:00
|
|
|
|
|
|
|
# ifdef CONFIG_SMP_ON_UP
|
|
|
|
# define v7wbi_possible_flags (v7wbi_tlb_flags_smp | v7wbi_tlb_flags_up)
|
|
|
|
# define v7wbi_always_flags (v7wbi_tlb_flags_smp & v7wbi_tlb_flags_up)
|
|
|
|
# elif defined(CONFIG_SMP)
|
|
|
|
# define v7wbi_possible_flags v7wbi_tlb_flags_smp
|
|
|
|
# define v7wbi_always_flags v7wbi_tlb_flags_smp
|
|
|
|
# else
|
|
|
|
# define v7wbi_possible_flags v7wbi_tlb_flags_up
|
|
|
|
# define v7wbi_always_flags v7wbi_tlb_flags_up
|
|
|
|
# endif
|
2007-05-18 11:25:31 +01:00
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v7wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v7wbi_possible_flags 0
|
|
|
|
# define v7wbi_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#ifndef _TLB
|
|
|
|
#error Unknown TLB model
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
|
|
|
#include <linux/sched.h>
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
struct cpu_tlb_fns {
|
|
|
|
void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);
|
|
|
|
void (*flush_kern_range)(unsigned long, unsigned long);
|
|
|
|
unsigned long tlb_flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Select the calling method
|
|
|
|
*/
|
|
|
|
#ifdef MULTI_TLB
|
|
|
|
|
|
|
|
#define __cpu_flush_user_tlb_range cpu_tlb.flush_user_range
|
|
|
|
#define __cpu_flush_kern_tlb_range cpu_tlb.flush_kern_range
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define __cpu_flush_user_tlb_range __glue(_TLB,_flush_user_tlb_range)
|
|
|
|
#define __cpu_flush_kern_tlb_range __glue(_TLB,_flush_kern_tlb_range)
|
|
|
|
|
|
|
|
extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
|
|
|
|
extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extern struct cpu_tlb_fns cpu_tlb;
|
|
|
|
|
|
|
|
#define __cpu_tlb_flags cpu_tlb.tlb_flags
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TLB Management
|
|
|
|
* ==============
|
|
|
|
*
|
|
|
|
* The arch/arm/mm/tlb-*.S files implement these methods.
|
|
|
|
*
|
|
|
|
* The TLB specific code is expected to perform whatever tests it
|
|
|
|
* needs to determine if it should invalidate the TLB for each
|
|
|
|
* call. Start addresses are inclusive and end addresses are
|
|
|
|
* exclusive; it is safe to round these addresses down.
|
|
|
|
*
|
|
|
|
* flush_tlb_all()
|
|
|
|
*
|
|
|
|
* Invalidate the entire TLB.
|
|
|
|
*
|
|
|
|
* flush_tlb_mm(mm)
|
|
|
|
*
|
|
|
|
* Invalidate all TLB entries in a particular address
|
|
|
|
* space.
|
|
|
|
* - mm - mm_struct describing address space
|
|
|
|
*
|
2021-06-28 19:39:56 -07:00
|
|
|
* flush_tlb_range(vma,start,end)
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
|
|
|
* Invalidate a range of TLB entries in the specified
|
|
|
|
* address space.
|
|
|
|
* - mm - mm_struct describing address space
|
|
|
|
* - start - start address (may not be aligned)
|
|
|
|
* - end - end address (exclusive, may not be aligned)
|
|
|
|
*
|
2021-06-28 19:39:56 -07:00
|
|
|
* flush_tlb_page(vma, uaddr)
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
|
|
|
* Invalidate the specified page in the specified address range.
|
2021-06-28 19:39:56 -07:00
|
|
|
* - vma - vm_area_struct describing address range
|
2005-04-16 15:20:36 -07:00
|
|
|
* - vaddr - virtual address (may not be aligned)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We optimise the code below by:
|
|
|
|
* - building a set of TLB flags that might be set in __cpu_tlb_flags
|
|
|
|
* - building a set of TLB flags that will always be set in __cpu_tlb_flags
|
|
|
|
* - if we're going to need __cpu_tlb_flags, access it once and only once
|
|
|
|
*
|
|
|
|
* This allows us to build optimal assembly for the single-CPU type case,
|
|
|
|
* and as close to optimal given the compiler constrants for multi-CPU
|
|
|
|
* case. We could do better for the multi-CPU case if the compiler
|
|
|
|
* implemented the "%?" method, but this has been discontinued due to too
|
|
|
|
* many people getting it wrong.
|
|
|
|
*/
|
2012-05-04 12:04:26 +01:00
|
|
|
#define possible_tlb_flags (v4_possible_flags | \
|
2005-04-16 15:20:36 -07:00
|
|
|
v4wbi_possible_flags | \
|
2008-06-22 22:45:04 +02:00
|
|
|
fr_possible_flags | \
|
2005-04-16 15:20:36 -07:00
|
|
|
v4wb_possible_flags | \
|
2009-03-25 13:10:01 +02:00
|
|
|
fa_possible_flags | \
|
2008-08-12 00:04:15 +01:00
|
|
|
v6wbi_possible_flags | \
|
|
|
|
v7wbi_possible_flags)
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-05-04 12:04:26 +01:00
|
|
|
#define always_tlb_flags (v4_always_flags & \
|
2005-04-16 15:20:36 -07:00
|
|
|
v4wbi_always_flags & \
|
2008-06-22 22:45:04 +02:00
|
|
|
fr_always_flags & \
|
2005-04-16 15:20:36 -07:00
|
|
|
v4wb_always_flags & \
|
2009-03-25 13:10:01 +02:00
|
|
|
fa_always_flags & \
|
2008-08-12 00:04:15 +01:00
|
|
|
v6wbi_always_flags & \
|
|
|
|
v7wbi_always_flags)
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
#define tlb_flag(f) ((always_tlb_flags & (f)) || (__tlb_flag & possible_tlb_flags & (f)))
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
#define __tlb_op(f, insnarg, arg) \
|
|
|
|
do { \
|
|
|
|
if (always_tlb_flags & (f)) \
|
|
|
|
asm("mcr " insnarg \
|
|
|
|
: : "r" (arg) : "cc"); \
|
|
|
|
else if (possible_tlb_flags & (f)) \
|
|
|
|
asm("tst %1, %2\n\t" \
|
|
|
|
"mcrne " insnarg \
|
|
|
|
: : "r" (arg), "r" (__tlb_flag), "Ir" (f) \
|
|
|
|
: "cc"); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define tlb_op(f, regs, arg) __tlb_op(f, "p15, 0, %0, " regs, arg)
|
|
|
|
#define tlb_l2_op(f, regs, arg) __tlb_op(f, "p15, 1, %0, " regs, arg)
|
|
|
|
|
2013-02-11 13:47:48 +00:00
|
|
|
static inline void __local_flush_tlb_all(void)
|
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero);
|
|
|
|
tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero);
|
|
|
|
tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
|
|
|
|
}
|
|
|
|
|
2005-06-28 13:40:39 +01:00
|
|
|
static inline void local_flush_tlb_all(void)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nshst);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2013-02-11 13:47:48 +00:00
|
|
|
__local_flush_tlb_all();
|
|
|
|
tlb_op(TLB_V7_UIS_FULL, "c8, c7, 0", zero);
|
2007-02-05 14:47:51 +01:00
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
if (tlb_flag(TLB_BARRIER)) {
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nsh);
|
2010-05-07 18:03:05 +01:00
|
|
|
isb();
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2013-02-11 13:47:48 +00:00
|
|
|
static inline void __flush_tlb_all(void)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ishst);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2013-02-11 13:47:48 +00:00
|
|
|
__local_flush_tlb_all();
|
|
|
|
tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_BARRIER)) {
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ish);
|
2013-02-11 13:47:48 +00:00
|
|
|
isb();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void __local_flush_tlb_mm(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const int asid = ASID(mm);
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
2013-01-14 20:48:55 +00:00
|
|
|
if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) {
|
2013-02-11 13:47:48 +00:00
|
|
|
if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero);
|
|
|
|
tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero);
|
|
|
|
tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero);
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
|
|
|
|
tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid);
|
|
|
|
tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid);
|
|
|
|
tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid);
|
2013-02-11 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void local_flush_tlb_mm(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
const int asid = ASID(mm);
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nshst);
|
2013-02-11 13:47:48 +00:00
|
|
|
|
|
|
|
__local_flush_tlb_mm(mm);
|
|
|
|
tlb_op(TLB_V7_UIS_ASID, "c8, c7, 2", asid);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_BARRIER))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nsh);
|
2013-02-11 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void __flush_tlb_mm(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ishst);
|
2013-02-11 13:47:48 +00:00
|
|
|
|
|
|
|
__local_flush_tlb_mm(mm);
|
2010-08-05 11:20:51 +01:00
|
|
|
#ifdef CONFIG_ARM_ERRATA_720789
|
2013-02-11 13:47:48 +00:00
|
|
|
tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", 0);
|
2010-08-05 11:20:51 +01:00
|
|
|
#else
|
2013-02-11 13:47:48 +00:00
|
|
|
tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", ASID(mm));
|
2010-08-05 11:20:51 +01:00
|
|
|
#endif
|
2007-02-05 14:47:51 +01:00
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
if (tlb_flag(TLB_BARRIER))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ish);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
2013-02-11 13:47:48 +00:00
|
|
|
__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
|
|
|
|
|
2013-01-14 20:48:55 +00:00
|
|
|
if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) &&
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
|
|
|
|
tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr);
|
|
|
|
tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr);
|
|
|
|
tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL))
|
2006-08-30 15:02:08 +01:00
|
|
|
asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr);
|
|
|
|
tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr);
|
|
|
|
tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr);
|
2013-02-11 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
|
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nshst);
|
2013-02-11 13:47:48 +00:00
|
|
|
|
|
|
|
__local_flush_tlb_page(vma, uaddr);
|
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", uaddr);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_BARRIER))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nsh);
|
2013-02-11 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
|
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ishst);
|
2013-02-11 13:47:48 +00:00
|
|
|
|
|
|
|
__local_flush_tlb_page(vma, uaddr);
|
2010-08-05 11:20:51 +01:00
|
|
|
#ifdef CONFIG_ARM_ERRATA_720789
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK);
|
2010-08-05 11:20:51 +01:00
|
|
|
#else
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", uaddr);
|
2010-08-05 11:20:51 +01:00
|
|
|
#endif
|
2007-02-05 14:47:51 +01:00
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
if (tlb_flag(TLB_BARRIER))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ish);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2013-02-11 13:47:48 +00:00
|
|
|
static inline void __local_flush_tlb_kernel_page(unsigned long kaddr)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr);
|
|
|
|
tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr);
|
|
|
|
tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr);
|
2005-04-16 15:20:36 -07:00
|
|
|
if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL))
|
2006-08-30 15:02:08 +01:00
|
|
|
asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
|
2005-04-16 15:20:36 -07:00
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr);
|
|
|
|
tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr);
|
|
|
|
tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr);
|
2013-02-11 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
|
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
kaddr &= PAGE_MASK;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nshst);
|
2013-02-11 13:47:48 +00:00
|
|
|
|
|
|
|
__local_flush_tlb_kernel_page(kaddr);
|
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", kaddr);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_BARRIER)) {
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(nsh);
|
2013-02-11 13:47:48 +00:00
|
|
|
isb();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void __flush_tlb_kernel_page(unsigned long kaddr)
|
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
kaddr &= PAGE_MASK;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ishst);
|
2013-02-11 13:47:48 +00:00
|
|
|
|
|
|
|
__local_flush_tlb_kernel_page(kaddr);
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr);
|
2006-03-07 14:42:27 +00:00
|
|
|
|
2011-07-05 09:01:13 +01:00
|
|
|
if (tlb_flag(TLB_BARRIER)) {
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ish);
|
2010-05-07 18:03:05 +01:00
|
|
|
isb();
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2013-05-23 18:29:18 +01:00
|
|
|
/*
|
|
|
|
* Branch predictor maintenance is paired with full TLB invalidation, so
|
|
|
|
* there is no need for any barriers here.
|
|
|
|
*/
|
2013-02-18 22:07:47 +00:00
|
|
|
static inline void __local_flush_bp_all(void)
|
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_V6_BP))
|
|
|
|
asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
|
|
|
|
}
|
|
|
|
|
2013-02-28 17:48:11 +01:00
|
|
|
static inline void local_flush_bp_all(void)
|
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
2013-02-18 22:07:47 +00:00
|
|
|
__local_flush_bp_all();
|
2013-02-28 17:48:11 +01:00
|
|
|
if (tlb_flag(TLB_V7_UIS_BP))
|
|
|
|
asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
|
|
|
|
}
|
|
|
|
|
2013-02-18 22:07:47 +00:00
|
|
|
static inline void __flush_bp_all(void)
|
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
__local_flush_bp_all();
|
|
|
|
if (tlb_flag(TLB_V7_UIS_BP))
|
|
|
|
asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero));
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
|
|
|
* flush_pmd_entry
|
|
|
|
*
|
|
|
|
* Flush a PMD entry (word aligned, or double-word aligned) to
|
|
|
|
* RAM if the TLB for the CPU we are running on requires this.
|
|
|
|
* This is typically used when we are creating PMD entries.
|
|
|
|
*
|
|
|
|
* clean_pmd_entry
|
|
|
|
*
|
|
|
|
* Clean (but don't drain the write buffer) if the CPU requires
|
|
|
|
* these operations. This is typically used when we are removing
|
|
|
|
* PMD entries.
|
|
|
|
*/
|
2011-09-05 17:51:56 +01:00
|
|
|
static inline void flush_pmd_entry(void *pmd)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd);
|
|
|
|
tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd);
|
2008-06-22 22:45:04 +02:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
if (tlb_flag(TLB_WB))
|
2013-05-23 18:43:58 +01:00
|
|
|
dsb(ishst);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2011-09-05 17:51:56 +01:00
|
|
|
static inline void clean_pmd_entry(void *pmd)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd);
|
|
|
|
tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
#undef tlb_op
|
2005-04-16 15:20:36 -07:00
|
|
|
#undef tlb_flag
|
|
|
|
#undef always_tlb_flags
|
|
|
|
#undef possible_tlb_flags
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert calls to our calling convention.
|
|
|
|
*/
|
2005-06-28 13:40:39 +01:00
|
|
|
#define local_flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
|
|
|
|
#define local_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
|
|
|
|
|
|
|
|
#ifndef CONFIG_SMP
|
|
|
|
#define flush_tlb_all local_flush_tlb_all
|
|
|
|
#define flush_tlb_mm local_flush_tlb_mm
|
|
|
|
#define flush_tlb_page local_flush_tlb_page
|
|
|
|
#define flush_tlb_kernel_page local_flush_tlb_kernel_page
|
|
|
|
#define flush_tlb_range local_flush_tlb_range
|
|
|
|
#define flush_tlb_kernel_range local_flush_tlb_kernel_range
|
2013-02-28 17:48:11 +01:00
|
|
|
#define flush_bp_all local_flush_bp_all
|
2005-06-28 13:40:39 +01:00
|
|
|
#else
|
|
|
|
extern void flush_tlb_all(void);
|
|
|
|
extern void flush_tlb_mm(struct mm_struct *mm);
|
|
|
|
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
|
|
|
|
extern void flush_tlb_kernel_page(unsigned long kaddr);
|
|
|
|
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
|
|
|
|
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
|
2013-02-28 17:48:11 +01:00
|
|
|
extern void flush_bp_all(void);
|
2005-06-28 13:40:39 +01:00
|
|
|
#endif
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
/*
|
2010-09-13 15:57:36 +01:00
|
|
|
* If PG_dcache_clean is not set for the page, we need to ensure that any
|
2005-04-16 15:20:36 -07:00
|
|
|
* cache entries for the kernels virtual memory range are written
|
2010-09-13 15:58:06 +01:00
|
|
|
* back to the page. On ARMv6 and later, the cache coherency is handled via
|
2023-08-02 16:13:37 +01:00
|
|
|
* the set_ptes() function.
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
2010-09-13 15:58:06 +01:00
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
2023-08-02 16:13:37 +01:00
|
|
|
void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
|
|
|
|
unsigned long addr, pte_t *ptep, unsigned int nr);
|
2010-09-13 15:58:06 +01:00
|
|
|
#else
|
2023-08-02 16:13:37 +01:00
|
|
|
static inline void update_mmu_cache_range(struct vm_fault *vmf,
|
|
|
|
struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
|
|
|
|
unsigned int nr)
|
2010-09-13 15:58:06 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2023-08-02 16:13:37 +01:00
|
|
|
#define update_mmu_cache(vma, addr, ptep) \
|
|
|
|
update_mmu_cache_range(NULL, vma, addr, ptep, 1)
|
|
|
|
|
2012-07-25 14:39:26 +01:00
|
|
|
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif
|
|
|
|
|
2012-02-28 12:56:06 +00:00
|
|
|
#elif defined(CONFIG_SMP) /* !CONFIG_MMU */
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
static inline void local_flush_tlb_all(void) { }
|
|
|
|
static inline void local_flush_tlb_mm(struct mm_struct *mm) { }
|
|
|
|
static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { }
|
|
|
|
static inline void local_flush_tlb_kernel_page(unsigned long kaddr) { }
|
|
|
|
static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { }
|
|
|
|
static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { }
|
2013-04-24 11:56:09 +01:00
|
|
|
static inline void local_flush_bp_all(void) { }
|
2012-02-28 12:56:06 +00:00
|
|
|
|
|
|
|
extern void flush_tlb_all(void);
|
|
|
|
extern void flush_tlb_mm(struct mm_struct *mm);
|
|
|
|
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
|
|
|
|
extern void flush_tlb_kernel_page(unsigned long kaddr);
|
|
|
|
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
|
|
|
|
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
|
2013-04-24 11:56:09 +01:00
|
|
|
extern void flush_bp_all(void);
|
2012-02-28 12:56:06 +00:00
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif
|
2006-02-24 21:41:25 +00:00
|
|
|
|
2013-10-09 17:26:44 +01:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#ifdef CONFIG_ARM_ERRATA_798181
|
|
|
|
extern void erratum_a15_798181_init(void);
|
|
|
|
#else
|
|
|
|
static inline void erratum_a15_798181_init(void) {}
|
|
|
|
#endif
|
|
|
|
extern bool (*erratum_a15_798181_handler)(void);
|
|
|
|
|
|
|
|
static inline bool erratum_a15_798181(void)
|
|
|
|
{
|
|
|
|
if (unlikely(IS_ENABLED(CONFIG_ARM_ERRATA_798181) &&
|
|
|
|
erratum_a15_798181_handler))
|
|
|
|
return erratum_a15_798181_handler();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif
|