linux/arch/x86/kernel/static_call.c
Peter Zijlstra 452cddbff7 static_call: Add static_call_cond()
Extend the static_call infrastructure to optimize the following common
pattern:

	if (func_ptr)
		func_ptr(args...)

For the trampoline (which is in effect a tail-call), we patch the
JMP.d32 into a RET, which then directly consumes the trampoline call.

For the in-line sites we replace the CALL with a NOP5.

NOTE: this is 'obviously' limited to functions with a 'void' return type.

NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed
      to a full function.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org
2020-09-01 09:58:05 +02:00

56 lines
1.2 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/static_call.h>
#include <linux/memory.h>
#include <linux/bug.h>
#include <asm/text-patching.h>
enum insn_type {
CALL = 0, /* site call */
NOP = 1, /* site cond-call */
JMP = 2, /* tramp / site tail-call */
RET = 3, /* tramp / site cond-tail-call */
};
static void __static_call_transform(void *insn, enum insn_type type, void *func)
{
int size = CALL_INSN_SIZE;
const void *code;
switch (type) {
case CALL:
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
break;
case NOP:
code = ideal_nops[NOP_ATOMIC5];
break;
case JMP:
code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
break;
case RET:
code = text_gen_insn(RET_INSN_OPCODE, insn, func);
size = RET_INSN_SIZE;
break;
}
if (memcmp(insn, code, size) == 0)
return;
text_poke_bp(insn, code, size, NULL);
}
void arch_static_call_transform(void *site, void *tramp, void *func)
{
mutex_lock(&text_mutex);
if (tramp)
__static_call_transform(tramp, func ? JMP : RET, func);
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
__static_call_transform(site, func ? CALL : NOP, func);
mutex_unlock(&text_mutex);
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);