mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Use asm_inline for all inline assemblies which make use of the EX_TABLE or ALTERNATIVE macros. These macros expand to many lines and the compiler assumes the number of lines within an inline assembly is the same as the number of instructions within an inline assembly. This has an effect on inlining and loop unrolling decisions. In order to avoid incorrect assumptions use asm_inline, which tells the compiler that an inline assembly has the smallest possible size. In order to avoid confusion when asm_inline should be used or not, since a couple of inline assemblies are quite large: the rule is to always use asm_inline whenever the EX_TABLE or ALTERNATIVE macro is used. In specific cases there may be reasons to not follow this guideline, but that should be documented with the corresponding code. Using the inline qualifier everywhere has only a small effect on the kernel image size: add/remove: 0/10 grow/shrink: 19/8 up/down: 1492/-1858 (-366) The only location where this seems to matter is load_unaligned_zeropad() from word-at-a-time.h where the compiler inlines more functions within the dcache code, which is indeed code where performance matters. Suggested-by: Juergen Christ <jchrist@linux.ibm.com> Reviewed-by: Juergen Christ <jchrist@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
65 lines
1.5 KiB
C
65 lines
1.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_WORD_AT_A_TIME_H
|
|
#define _ASM_WORD_AT_A_TIME_H
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/wordpart.h>
|
|
#include <asm/asm-extable.h>
|
|
#include <asm/bitsperlong.h>
|
|
|
|
struct word_at_a_time {
|
|
const unsigned long bits;
|
|
};
|
|
|
|
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x7f) }
|
|
|
|
static inline unsigned long prep_zero_mask(unsigned long val, unsigned long data, const struct word_at_a_time *c)
|
|
{
|
|
return data;
|
|
}
|
|
|
|
static inline unsigned long create_zero_mask(unsigned long data)
|
|
{
|
|
return __fls(data);
|
|
}
|
|
|
|
static inline unsigned long find_zero(unsigned long data)
|
|
{
|
|
return (data ^ (BITS_PER_LONG - 1)) >> 3;
|
|
}
|
|
|
|
static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
|
|
{
|
|
unsigned long mask = (val & c->bits) + c->bits;
|
|
|
|
*data = ~(mask | val | c->bits);
|
|
return *data;
|
|
}
|
|
|
|
static inline unsigned long zero_bytemask(unsigned long data)
|
|
{
|
|
return ~1UL << data;
|
|
}
|
|
|
|
/*
|
|
* Load an unaligned word from kernel space.
|
|
*
|
|
* In the (very unlikely) case of the word being a page-crosser
|
|
* and the next page not being mapped, take the exception and
|
|
* return zeroes in the non-existing part.
|
|
*/
|
|
static inline unsigned long load_unaligned_zeropad(const void *addr)
|
|
{
|
|
unsigned long data;
|
|
|
|
asm_inline volatile(
|
|
"0: lg %[data],0(%[addr])\n"
|
|
"1: nopr %%r7\n"
|
|
EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr])
|
|
EX_TABLE_ZEROPAD(1b, 1b, %[data], %[addr])
|
|
: [data] "=d" (data)
|
|
: [addr] "a" (addr), "m" (*(unsigned long *)addr));
|
|
return data;
|
|
}
|
|
|
|
#endif /* _ASM_WORD_AT_A_TIME_H */
|