x86/percpu/64: Use relative percpu offsets

The percpu section is currently linked at absolute address 0, because
older compilers hard-coded the stack protector canary value at a fixed
offset from the start of the GS segment.  Now that the canary is a
normal percpu variable, the percpu section does not need to be linked
at a specific address.

x86-64 will now calculate the percpu offsets as the delta between the
initial percpu address and the dynamically allocated memory, like other
architectures.  Note that GSBASE is limited to the canonical address
width (48 or 57 bits, sign-extended).  As long as the kernel text,
modules, and the dynamically allocated percpu memory are all in the
negative address space, the delta will not overflow this limit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250123190747.745588-9-brgerst@gmail.com
This commit is contained in:
Brian Gerst 2025-01-23 14:07:40 -05:00 committed by Ingo Molnar
parent 80d47defdd
commit 9d7de2aa8b
8 changed files with 27 additions and 65 deletions

View file

@ -431,7 +431,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
#ifdef CONFIG_SMP
return per_cpu_offset(cpu);
#else
return 0;
#endif
}
extern asmlinkage void entry_SYSCALL32_ignore(void);

View file

@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Set up the stack for verify_cpu() */
leaq __top_init_kernel_stack(%rip), %rsp
/* Setup GSBASE to allow stack canary access for C code */
/*
* Set up GSBASE.
* Note that on SMP the boot CPU uses the init data section until
* the per-CPU areas are set up.
*/
movl $MSR_GS_BASE, %ecx
leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
movl %edx, %eax
shrq $32, %rdx
xorl %eax, %eax
xorl %edx, %edx
wrmsr
call startup_64_setup_gdt_idt
@ -359,16 +362,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
movl %eax,%fs
movl %eax,%gs
/* Set up %gs.
*
* The base of %gs always points to fixed_percpu_data.
/*
* Set up GSBASE.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
#ifndef CONFIG_SMP
leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
#endif
movl %edx, %eax
shrq $32, %rdx
wrmsr

View file

@ -23,18 +23,10 @@
#include <asm/cpumask.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_64
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
#define BOOT_PERCPU_OFFSET 0
#endif
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};
unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
EXPORT_SYMBOL(__per_cpu_offset);
/*

View file

@ -112,12 +112,6 @@ ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(6); /* RW_ */
#ifdef CONFIG_X86_64
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(6); /* RW_ */
#endif
init PT_LOAD FLAGS(7); /* RWE */
#endif
note PT_NOTE FLAGS(0); /* ___ */
}
@ -216,21 +210,7 @@ SECTIONS
__init_begin = .; /* paired with __init_end */
}
#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
/*
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
* output PHDR, so the next output section - .init.text - should
* start another segment - init.
*/
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif
INIT_TEXT_SECTION(PAGE_SIZE)
#ifdef CONFIG_X86_64
:init
#endif
/*
* Section for code used exclusively before alternatives are run. All
@ -347,9 +327,7 @@ SECTIONS
EXIT_DATA
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif
RUNTIME_CONST_VARIABLES
RUNTIME_CONST(ptr, USER_PTR_MAX)
@ -497,16 +475,11 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);
#ifdef CONFIG_SMP
. = ASSERT((fixed_percpu_data == 0),
"fixed_percpu_data is not at start of per-cpu area");
#endif
#ifdef CONFIG_MITIGATION_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
#endif

View file

@ -179,9 +179,8 @@ SYM_CODE_START(pvh_start_xen)
* the per-CPU areas are set up.
*/
movl $MSR_GS_BASE,%ecx
leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
movq %edx, %eax
shrq $32, %rdx
xorl %eax, %eax
xorl %edx, %edx
wrmsr
/* Call xen_prepare_pvh() via the kernel virtual mapping */

View file

@ -835,12 +835,7 @@ static void percpu_init(void)
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
{
int shndx = sym_index(sym);
return (shndx == per_cpu_shndx) &&
strcmp(symname, "__init_begin") &&
strcmp(symname, "__per_cpu_load") &&
strncmp(symname, "init_per_cpu_", 13);
return 0;
}
@ -1062,7 +1057,8 @@ static int cmp_relocs(const void *va, const void *vb)
static void sort_relocs(struct relocs *r)
{
qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
if (r->count)
qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
}
static int write32(uint32_t v, FILE *f)

View file

@ -31,15 +31,14 @@ SYM_CODE_START(startup_xen)
leaq __top_init_kernel_stack(%rip), %rsp
/* Set up %gs.
*
* The base of %gs always points to fixed_percpu_data.
/*
* Set up GSBASE.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
xorl %eax, %eax
xorl %edx, %edx
wrmsr
mov %rsi, %rdi

View file

@ -1872,7 +1872,7 @@ config KALLSYMS_ALL
config KALLSYMS_ABSOLUTE_PERCPU
bool
depends on KALLSYMS
default X86_64 && SMP
default n
# end of the "standard kernel features (expert users)" menu