mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-05-24 10:39:52 +00:00

Currently, the patch of an address is done in three steps: -- Pseudo-code #1 - Current implementation --- 1) add an int3 trap to the address that will be patched sync cores (send IPI to all other CPUs) 2) update all but the first byte of the patched range sync cores (send IPI to all other CPUs) 3) replace the first byte (int3) by the first byte of replacing opcode sync cores (send IPI to all other CPUs) -- Pseudo-code #1 --- When a static key has more than one entry, these steps are called once for each entry. The number of IPIs then is linear with regard to the number 'n' of entries of a key: O(n*3), which is O(n). This algorithm works fine for the update of a single key. But we think it is possible to optimize the case in which a static key has more than one entry. For instance, the sched_schedstats jump label has 56 entries in my (updated) fedora kernel, resulting in 168 IPIs for each CPU in which the thread that is enabling the key is _not_ running. With this patch, rather than receiving a single patch to be processed, a vector of patches is passed, enabling the rewrite of the pseudo-code #1 in this way: -- Pseudo-code #2 - This patch --- 1) for each patch in the vector: add an int3 trap to the address that will be patched sync cores (send IPI to all other CPUs) 2) for each patch in the vector: update all but the first byte of the patched range sync cores (send IPI to all other CPUs) 3) for each patch in the vector: replace the first byte (int3) by the first byte of replacing opcode sync cores (send IPI to all other CPUs) -- Pseudo-code #2 - This patch --- Doing the update in this way, the number of IPI becomes O(3) with regard to the number of keys, which is O(1). The batch mode is done with the function text_poke_bp_batch(), that receives two arguments: a vector of "struct text_to_poke", and the number of entries in the vector. The vector must be sorted by the addr field of the text_to_poke structure, enabling the binary search of a handler in the poke_int3_handler function (a fast path). Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Chris von Recklinghausen <crecklin@redhat.com> Cc: Clark Williams <williams@redhat.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jason Baron <jbaron@akamai.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Scott Wood <swood@redhat.com> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/ca506ed52584c80f64de23f6f55ca288e5d079de.1560325897.git.bristot@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
90 lines
2.9 KiB
C
90 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_TEXT_PATCHING_H
|
|
#define _ASM_X86_TEXT_PATCHING_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/stddef.h>
|
|
#include <asm/ptrace.h>
|
|
|
|
struct paravirt_patch_site;
|
|
#ifdef CONFIG_PARAVIRT
|
|
void apply_paravirt(struct paravirt_patch_site *start,
|
|
struct paravirt_patch_site *end);
|
|
#else
|
|
static inline void apply_paravirt(struct paravirt_patch_site *start,
|
|
struct paravirt_patch_site *end)
|
|
{}
|
|
#define __parainstructions NULL
|
|
#define __parainstructions_end NULL
|
|
#endif
|
|
|
|
/*
|
|
* Currently, the max observed size in the kernel code is
|
|
* JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
|
|
* Raise it if needed.
|
|
*/
|
|
#define POKE_MAX_OPCODE_SIZE 5
|
|
|
|
struct text_poke_loc {
|
|
void *detour;
|
|
void *addr;
|
|
size_t len;
|
|
const char opcode[POKE_MAX_OPCODE_SIZE];
|
|
};
|
|
|
|
extern void text_poke_early(void *addr, const void *opcode, size_t len);
|
|
|
|
/*
|
|
* Clear and restore the kernel write-protection flag on the local CPU.
|
|
* Allows the kernel to edit read-only pages.
|
|
* Side-effect: any interrupt handler running between save and restore will have
|
|
* the ability to write to read-only pages.
|
|
*
|
|
* Warning:
|
|
* Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
|
|
* no thread can be preempted in the instructions being modified (no iret to an
|
|
* invalid instruction possible) or if the instructions are changed from a
|
|
* consistent state to another consistent state atomically.
|
|
* On the local CPU you need to be protected again NMI or MCE handlers seeing an
|
|
* inconsistent instruction while you patch.
|
|
*/
|
|
extern void *text_poke(void *addr, const void *opcode, size_t len);
|
|
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
|
|
extern int poke_int3_handler(struct pt_regs *regs);
|
|
extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
|
|
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
|
|
extern int after_bootmem;
|
|
extern __ro_after_init struct mm_struct *poking_mm;
|
|
extern __ro_after_init unsigned long poking_addr;
|
|
|
|
#ifndef CONFIG_UML_X86
|
|
static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
|
|
{
|
|
regs->ip = ip;
|
|
}
|
|
|
|
#define INT3_INSN_SIZE 1
|
|
#define CALL_INSN_SIZE 5
|
|
|
|
#ifdef CONFIG_X86_64
|
|
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
|
|
{
|
|
/*
|
|
* The int3 handler in entry_64.S adds a gap between the
|
|
* stack where the break point happened, and the saving of
|
|
* pt_regs. We can extend the original stack because of
|
|
* this gap. See the idtentry macro's create_gap option.
|
|
*/
|
|
regs->sp -= sizeof(unsigned long);
|
|
*(unsigned long *)regs->sp = val;
|
|
}
|
|
|
|
static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
|
|
{
|
|
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
|
|
int3_emulate_jmp(regs, func);
|
|
}
|
|
#endif /* CONFIG_X86_64 */
|
|
#endif /* !CONFIG_UML_X86 */
|
|
|
|
#endif /* _ASM_X86_TEXT_PATCHING_H */
|