linux/arch/s390/include/asm/word-at-a-time.h
Heiko Carstens 0dafe9968a s390: Use inline qualifier for all EX_TABLE and ALTERNATIVE inline assemblies
Use asm_inline for all inline assemblies which make use of the EX_TABLE or
ALTERNATIVE macros.

These macros expand to many lines and the compiler assumes the number of
lines within an inline assembly is the same as the number of instructions
within an inline assembly. This has an effect on inlining and loop
unrolling decisions.

In order to avoid incorrect assumptions use asm_inline, which tells the
compiler that an inline assembly has the smallest possible size.

In order to avoid confusion when asm_inline should be used or not, since a
couple of inline assemblies are quite large: the rule is to always use
asm_inline whenever the EX_TABLE or ALTERNATIVE macro is used. In specific
cases there may be reasons to not follow this guideline, but that should
be documented with the corresponding code.

Using the inline qualifier everywhere has only a small effect on the kernel
image size:

add/remove: 0/10 grow/shrink: 19/8 up/down: 1492/-1858 (-366)

The only location where this seems to matter is load_unaligned_zeropad()
from word-at-a-time.h where the compiler inlines more functions within the
dcache code, which is indeed code where performance matters.

Suggested-by: Juergen Christ <jchrist@linux.ibm.com>
Reviewed-by: Juergen Christ <jchrist@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2025-03-18 17:13:51 +01:00

65 lines
1.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
#include <linux/bitops.h>
#include <linux/wordpart.h>
#include <asm/asm-extable.h>
#include <asm/bitsperlong.h>
struct word_at_a_time {
const unsigned long bits;
};
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x7f) }
static inline unsigned long prep_zero_mask(unsigned long val, unsigned long data, const struct word_at_a_time *c)
{
return data;
}
static inline unsigned long create_zero_mask(unsigned long data)
{
return __fls(data);
}
static inline unsigned long find_zero(unsigned long data)
{
return (data ^ (BITS_PER_LONG - 1)) >> 3;
}
static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long mask = (val & c->bits) + c->bits;
*data = ~(mask | val | c->bits);
return *data;
}
static inline unsigned long zero_bytemask(unsigned long data)
{
return ~1UL << data;
}
/*
* Load an unaligned word from kernel space.
*
* In the (very unlikely) case of the word being a page-crosser
* and the next page not being mapped, take the exception and
* return zeroes in the non-existing part.
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long data;
asm_inline volatile(
"0: lg %[data],0(%[addr])\n"
"1: nopr %%r7\n"
EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr])
EX_TABLE_ZEROPAD(1b, 1b, %[data], %[addr])
: [data] "=d" (data)
: [addr] "a" (addr), "m" (*(unsigned long *)addr));
return data;
}
#endif /* _ASM_WORD_AT_A_TIME_H */