2019-06-03 07:44:50 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2015-02-03 11:26:53 +00:00
|
|
|
/*
|
|
|
|
* Based on arch/arm/include/asm/atomic.h
|
|
|
|
*
|
|
|
|
* Copyright (C) 1996 Russell King.
|
|
|
|
* Copyright (C) 2002 Deep Blue Solutions Ltd.
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __ASM_ATOMIC_LL_SC_H
|
|
|
|
#define __ASM_ATOMIC_LL_SC_H
|
|
|
|
|
2019-08-29 14:34:42 +01:00
|
|
|
#include <linux/stringify.h>
|
|
|
|
|
|
|
|
#ifndef CONFIG_CC_HAS_K_CONSTRAINT
|
|
|
|
#define K
|
|
|
|
#endif
|
|
|
|
|
2015-02-03 11:26:53 +00:00
|
|
|
/*
|
|
|
|
* AArch64 UP and SMP safe atomic ops. We use load exclusive and
|
|
|
|
* store exclusive to ensure that these are atomic. We may loop
|
|
|
|
* to ensure that the update happens.
|
|
|
|
*/
|
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
#define ATOMIC_OP(op, asm_op, constraint) \
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline void \
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc_atomic_##op(int i, atomic_t *v) \
|
2015-02-03 11:26:53 +00:00
|
|
|
{ \
|
|
|
|
unsigned long tmp; \
|
|
|
|
int result; \
|
|
|
|
\
|
|
|
|
asm volatile("// atomic_" #op "\n" \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %2\n" \
|
|
|
|
"1: ldxr %w0, %2\n" \
|
|
|
|
" " #asm_op " %w0, %w0, %w3\n" \
|
|
|
|
" stxr %w1, %w0, %2\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
" cbnz %w1, 1b\n" \
|
2015-02-03 11:26:53 +00:00
|
|
|
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: __stringify(constraint) "r" (i)); \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
2015-02-03 11:26:53 +00:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline int \
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
|
2015-02-03 11:26:53 +00:00
|
|
|
{ \
|
|
|
|
unsigned long tmp; \
|
|
|
|
int result; \
|
|
|
|
\
|
2015-10-08 20:15:18 +01:00
|
|
|
asm volatile("// atomic_" #op "_return" #name "\n" \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %2\n" \
|
|
|
|
"1: ld" #acq "xr %w0, %2\n" \
|
|
|
|
" " #asm_op " %w0, %w0, %w3\n" \
|
|
|
|
" st" #rel "xr %w1, %w0, %2\n" \
|
|
|
|
" cbnz %w1, 1b\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
" " #mb \
|
2015-02-03 11:26:53 +00:00
|
|
|
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: __stringify(constraint) "r" (i) \
|
2015-10-08 20:15:18 +01:00
|
|
|
: cl); \
|
2015-02-03 11:26:53 +00:00
|
|
|
\
|
|
|
|
return result; \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
2015-10-08 20:15:18 +01:00
|
|
|
|
2019-08-28 18:50:07 +01:00
|
|
|
#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline int \
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
{ \
|
|
|
|
unsigned long tmp; \
|
|
|
|
int val, result; \
|
|
|
|
\
|
|
|
|
asm volatile("// atomic_fetch_" #op #name "\n" \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %3\n" \
|
|
|
|
"1: ld" #acq "xr %w0, %3\n" \
|
|
|
|
" " #asm_op " %w1, %w0, %w4\n" \
|
|
|
|
" st" #rel "xr %w2, %w1, %3\n" \
|
|
|
|
" cbnz %w2, 1b\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
" " #mb \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: __stringify(constraint) "r" (i) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
: cl); \
|
|
|
|
\
|
|
|
|
return result; \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
|
2015-10-08 20:15:18 +01:00
|
|
|
#define ATOMIC_OPS(...) \
|
|
|
|
ATOMIC_OP(__VA_ARGS__) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\
|
2015-10-08 20:15:18 +01:00
|
|
|
ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
|
|
|
|
ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
|
2015-02-03 11:26:53 +00:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
ATOMIC_OPS(add, add, I)
|
|
|
|
ATOMIC_OPS(sub, sub, J)
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
|
|
|
|
#undef ATOMIC_OPS
|
|
|
|
#define ATOMIC_OPS(...) \
|
|
|
|
ATOMIC_OP(__VA_ARGS__) \
|
|
|
|
ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
|
|
|
|
ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
|
2015-02-03 11:26:53 +00:00
|
|
|
|
2019-08-29 14:34:42 +01:00
|
|
|
ATOMIC_OPS(and, and, K)
|
|
|
|
ATOMIC_OPS(or, orr, K)
|
|
|
|
ATOMIC_OPS(xor, eor, K)
|
|
|
|
/*
|
|
|
|
* GAS converts the mysterious and undocumented BIC (immediate) alias to
|
|
|
|
* an AND (immediate) instruction with the immediate inverted. We don't
|
|
|
|
* have a constraint for this, so fall back to register.
|
|
|
|
*/
|
2019-08-28 18:50:06 +01:00
|
|
|
ATOMIC_OPS(andnot, bic, )
|
2015-02-03 11:26:53 +00:00
|
|
|
|
|
|
|
#undef ATOMIC_OPS
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
#undef ATOMIC_FETCH_OP
|
2015-02-03 11:26:53 +00:00
|
|
|
#undef ATOMIC_OP_RETURN
|
|
|
|
#undef ATOMIC_OP
|
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
#define ATOMIC64_OP(op, asm_op, constraint) \
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline void \
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc_atomic64_##op(s64 i, atomic64_t *v) \
|
2015-02-03 11:26:53 +00:00
|
|
|
{ \
|
2019-05-22 14:22:39 +01:00
|
|
|
s64 result; \
|
2015-02-03 11:26:53 +00:00
|
|
|
unsigned long tmp; \
|
|
|
|
\
|
|
|
|
asm volatile("// atomic64_" #op "\n" \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %2\n" \
|
|
|
|
"1: ldxr %0, %2\n" \
|
|
|
|
" " #asm_op " %0, %0, %3\n" \
|
|
|
|
" stxr %w1, %0, %2\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
" cbnz %w1, 1b" \
|
2015-02-03 11:26:53 +00:00
|
|
|
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: __stringify(constraint) "r" (i)); \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
2015-02-03 11:26:53 +00:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline long \
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
|
2015-02-03 11:26:53 +00:00
|
|
|
{ \
|
2019-05-22 14:22:39 +01:00
|
|
|
s64 result; \
|
2015-02-03 11:26:53 +00:00
|
|
|
unsigned long tmp; \
|
|
|
|
\
|
2015-10-08 20:15:18 +01:00
|
|
|
asm volatile("// atomic64_" #op "_return" #name "\n" \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %2\n" \
|
|
|
|
"1: ld" #acq "xr %0, %2\n" \
|
|
|
|
" " #asm_op " %0, %0, %3\n" \
|
|
|
|
" st" #rel "xr %w1, %0, %2\n" \
|
|
|
|
" cbnz %w1, 1b\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
" " #mb \
|
2015-02-03 11:26:53 +00:00
|
|
|
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: __stringify(constraint) "r" (i) \
|
2015-10-08 20:15:18 +01:00
|
|
|
: cl); \
|
2015-02-03 11:26:53 +00:00
|
|
|
\
|
|
|
|
return result; \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
2015-10-08 20:15:18 +01:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline long \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
{ \
|
2019-05-22 14:22:39 +01:00
|
|
|
s64 result, val; \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
unsigned long tmp; \
|
|
|
|
\
|
|
|
|
asm volatile("// atomic64_fetch_" #op #name "\n" \
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %3\n" \
|
|
|
|
"1: ld" #acq "xr %0, %3\n" \
|
|
|
|
" " #asm_op " %1, %0, %4\n" \
|
|
|
|
" st" #rel "xr %w2, %1, %3\n" \
|
|
|
|
" cbnz %w2, 1b\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
" " #mb \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: __stringify(constraint) "r" (i) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
: cl); \
|
|
|
|
\
|
|
|
|
return result; \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
|
2015-10-08 20:15:18 +01:00
|
|
|
#define ATOMIC64_OPS(...) \
|
|
|
|
ATOMIC64_OP(__VA_ARGS__) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \
|
2015-10-08 20:15:18 +01:00
|
|
|
ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
|
|
|
|
ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
|
2015-02-03 11:26:53 +00:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
ATOMIC64_OPS(add, add, I)
|
|
|
|
ATOMIC64_OPS(sub, sub, J)
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
|
|
|
|
#undef ATOMIC64_OPS
|
|
|
|
#define ATOMIC64_OPS(...) \
|
|
|
|
ATOMIC64_OP(__VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
|
|
|
|
ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
|
2015-02-03 11:26:53 +00:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
ATOMIC64_OPS(and, and, L)
|
|
|
|
ATOMIC64_OPS(or, orr, L)
|
|
|
|
ATOMIC64_OPS(xor, eor, L)
|
2019-08-29 14:34:42 +01:00
|
|
|
/*
|
|
|
|
* GAS converts the mysterious and undocumented BIC (immediate) alias to
|
|
|
|
* an AND (immediate) instruction with the immediate inverted. We don't
|
|
|
|
* have a constraint for this, so fall back to register.
|
|
|
|
*/
|
|
|
|
ATOMIC64_OPS(andnot, bic, )
|
2015-02-03 11:26:53 +00:00
|
|
|
|
|
|
|
#undef ATOMIC64_OPS
|
locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.
This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).
[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-18 01:16:09 +02:00
|
|
|
#undef ATOMIC64_FETCH_OP
|
2015-02-03 11:26:53 +00:00
|
|
|
#undef ATOMIC64_OP_RETURN
|
|
|
|
#undef ATOMIC64_OP
|
|
|
|
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline s64
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
|
2015-02-03 11:26:53 +00:00
|
|
|
{
|
2019-05-22 14:22:39 +01:00
|
|
|
s64 result;
|
2015-02-03 11:26:53 +00:00
|
|
|
unsigned long tmp;
|
|
|
|
|
|
|
|
asm volatile("// atomic64_dec_if_positive\n"
|
arm64: atomics: format whitespace consistently
The code for the atomic ops is formatted inconsistently, and while this
is not a functional problem it is rather distracting when working on
them.
Some have ops have consistent indentation, e.g.
| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
| static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
| { \
| u32 tmp; \
| \
| asm volatile( \
| __LSE_PREAMBLE \
| " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
| " add %w[i], %w[i], %w[tmp]" \
| : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
| : "r" (v) \
| : cl); \
| \
| return i; \
| }
While others have negative indentation for some lines, and/or have
misaligned trailing backslashes, e.g.
| static inline void __lse_atomic_##op(int i, atomic_t *v) \
| { \
| asm volatile( \
| __LSE_PREAMBLE \
| " " #asm_op " %w[i], %[v]\n" \
| : [i] "+r" (i), [v] "+Q" (v->counter) \
| : "r" (v)); \
| }
This patch makes the indentation consistent and also aligns the trailing
backslashes. This makes the code easier to read for those (like myself)
who are easily distracted by these inconsistencies.
This is intended as a cleanup.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-12-10 15:14:06 +00:00
|
|
|
" prfm pstl1strm, %2\n"
|
|
|
|
"1: ldxr %0, %2\n"
|
|
|
|
" subs %0, %0, #1\n"
|
|
|
|
" b.lt 2f\n"
|
|
|
|
" stlxr %w1, %0, %2\n"
|
|
|
|
" cbnz %w1, 1b\n"
|
|
|
|
" dmb ish\n"
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
"2:"
|
2015-02-03 11:26:53 +00:00
|
|
|
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
|
|
|
|
:
|
|
|
|
: "cc", "memory");
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
|
2022-08-17 16:59:14 +01:00
|
|
|
static __always_inline u##sz \
|
2019-08-28 18:50:07 +01:00
|
|
|
__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
|
2018-09-13 13:30:45 +01:00
|
|
|
unsigned long old, \
|
2019-08-28 18:50:07 +01:00
|
|
|
u##sz new) \
|
2015-04-23 20:08:49 +01:00
|
|
|
{ \
|
2018-09-13 13:30:45 +01:00
|
|
|
unsigned long tmp; \
|
|
|
|
u##sz oldval; \
|
2015-04-23 20:08:49 +01:00
|
|
|
\
|
2018-09-13 14:28:33 +01:00
|
|
|
/* \
|
|
|
|
* Sub-word sizes require explicit casting so that the compare \
|
|
|
|
* part of the cmpxchg doesn't end up interpreting non-zero \
|
|
|
|
* upper bits of the register containing "old". \
|
|
|
|
*/ \
|
|
|
|
if (sz < 32) \
|
|
|
|
old = (u##sz)old; \
|
|
|
|
\
|
2015-04-23 20:08:49 +01:00
|
|
|
asm volatile( \
|
2015-08-04 17:27:34 +01:00
|
|
|
" prfm pstl1strm, %[v]\n" \
|
2018-09-13 13:30:45 +01:00
|
|
|
"1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
|
2015-04-23 20:08:49 +01:00
|
|
|
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
|
|
|
|
" cbnz %" #w "[tmp], 2f\n" \
|
2018-09-13 13:30:45 +01:00
|
|
|
" st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
|
2015-04-23 20:08:49 +01:00
|
|
|
" cbnz %w[tmp], 1b\n" \
|
|
|
|
" " #mb "\n" \
|
arm64: atomics: remove LL/SC trampolines
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
a fragment of code being generated in a subsection without a clear
association with its caller. A trampoline in the caller branches to the
LL/SC atomic with with a direct branch, and the atomic directly branches
back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will
be symbolized as an offset from the nearest prior symbol (which may not
be the function using the atomic), and since the atomic returns with a
direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain
atomic_inc(NULL), the resulting exception can be reported as being taken
from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| [0000000000000000] user address but active_mm is swapper
| Internal error: Oops: 96000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : cpus_are_stuck_in_kernel+0xa4/0x120
| lr : secondary_start_kernel+0x164/0x170
| sp : ffff80000a4cbe90
| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
| Call trace:
| cpus_are_stuck_in_kernel+0xa4/0x120
| __secondary_switched+0xb0/0xb4
| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
| ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for
CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups,
which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC
atomics together. This has never been measured, and this theoretical
benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file
granularity, these are spread out throughout the kernel and can share
cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in
specialised __ll_sc_*() functions, introducing more branching than was
intended.
* Removing the trampolines has been observed to shrink a defconfig
kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
will be inlined into their callers (or placed in out-of line functions
using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
atomics are always called in an unlikely branch, and will be placed in a
cold portion of the function, so this should have minimal impact to the
hot paths.
Other than the improved backtracing, there should be no functional
change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-08-17 16:59:13 +01:00
|
|
|
"2:" \
|
2015-04-23 20:08:49 +01:00
|
|
|
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
|
2018-09-13 13:30:45 +01:00
|
|
|
[v] "+Q" (*(u##sz *)ptr) \
|
2019-08-29 14:34:42 +01:00
|
|
|
: [old] __stringify(constraint) "r" (old), [new] "r" (new) \
|
2015-04-23 20:08:49 +01:00
|
|
|
: cl); \
|
|
|
|
\
|
|
|
|
return oldval; \
|
2019-08-28 18:50:07 +01:00
|
|
|
}
|
2015-04-23 20:08:49 +01:00
|
|
|
|
2019-08-28 18:50:06 +01:00
|
|
|
/*
|
|
|
|
* Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
|
|
|
|
* handle the 'K' constraint for the value 4294967295 - thus we use no
|
|
|
|
* constraint for 32 bit operations.
|
|
|
|
*/
|
2019-08-29 14:34:42 +01:00
|
|
|
__CMPXCHG_CASE(w, b, , 8, , , , , K)
|
|
|
|
__CMPXCHG_CASE(w, h, , 16, , , , , K)
|
|
|
|
__CMPXCHG_CASE(w, , , 32, , , , , K)
|
2019-08-28 18:50:06 +01:00
|
|
|
__CMPXCHG_CASE( , , , 64, , , , , L)
|
2019-08-29 14:34:42 +01:00
|
|
|
__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory", K)
|
|
|
|
__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory", K)
|
|
|
|
__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory", K)
|
2019-08-28 18:50:06 +01:00
|
|
|
__CMPXCHG_CASE( , , acq_, 64, , a, , "memory", L)
|
2019-08-29 14:34:42 +01:00
|
|
|
__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory", K)
|
|
|
|
__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory", K)
|
|
|
|
__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory", K)
|
2019-08-28 18:50:06 +01:00
|
|
|
__CMPXCHG_CASE( , , rel_, 64, , , l, "memory", L)
|
2019-08-29 14:34:42 +01:00
|
|
|
__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory", K)
|
|
|
|
__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory", K)
|
|
|
|
__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory", K)
|
2019-08-28 18:50:06 +01:00
|
|
|
__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
|
2015-04-23 20:08:49 +01:00
|
|
|
|
|
|
|
#undef __CMPXCHG_CASE
|
|
|
|
|
2023-05-31 15:08:36 +02:00
|
|
|
union __u128_halves {
|
|
|
|
u128 full;
|
|
|
|
struct {
|
|
|
|
u64 low, high;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
#define __CMPXCHG128(name, mb, rel, cl...) \
|
|
|
|
static __always_inline u128 \
|
|
|
|
__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
|
|
|
|
{ \
|
|
|
|
union __u128_halves r, o = { .full = (old) }, \
|
|
|
|
n = { .full = (new) }; \
|
|
|
|
unsigned int tmp; \
|
|
|
|
\
|
|
|
|
asm volatile("// __cmpxchg128" #name "\n" \
|
|
|
|
" prfm pstl1strm, %[v]\n" \
|
|
|
|
"1: ldxp %[rl], %[rh], %[v]\n" \
|
|
|
|
" cmp %[rl], %[ol]\n" \
|
|
|
|
" ccmp %[rh], %[oh], 0, eq\n" \
|
|
|
|
" b.ne 2f\n" \
|
|
|
|
" st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \
|
|
|
|
" cbnz %w[tmp], 1b\n" \
|
|
|
|
" " #mb "\n" \
|
|
|
|
"2:" \
|
|
|
|
: [v] "+Q" (*(u128 *)ptr), \
|
|
|
|
[rl] "=&r" (r.low), [rh] "=&r" (r.high), \
|
|
|
|
[tmp] "=&r" (tmp) \
|
|
|
|
: [ol] "r" (o.low), [oh] "r" (o.high), \
|
|
|
|
[nl] "r" (n.low), [nh] "r" (n.high) \
|
|
|
|
: "cc", ##cl); \
|
|
|
|
\
|
|
|
|
return r.full; \
|
|
|
|
}
|
|
|
|
|
|
|
|
__CMPXCHG128( , , )
|
|
|
|
__CMPXCHG128(_mb, dmb ish, l, "memory")
|
|
|
|
|
|
|
|
#undef __CMPXCHG128
|
|
|
|
|
2019-08-29 14:34:42 +01:00
|
|
|
#undef K
|
2015-05-14 18:05:50 +01:00
|
|
|
|
2015-02-03 11:26:53 +00:00
|
|
|
#endif /* __ASM_ATOMIC_LL_SC_H */
|