linux/arch/loongarch/include/asm/elf.h
Xi Ruoyao c239665130 LoongArch: Fix and simplify fcsr initialization on execve()
There has been a lingering bug in LoongArch Linux systems causing some
GCC tests to intermittently fail (see Closes link).  I've made a minimal
reproducer:

    zsh% cat measure.s
    .align 4
    .globl _start
    _start:
        movfcsr2gr  $a0, $fcsr0
        bstrpick.w  $a0, $a0, 16, 16
        beqz        $a0, .ok
        break       0
    .ok:
        li.w        $a7, 93
        syscall     0
    zsh% cc mesaure.s -o measure -nostdlib
    zsh% echo $((1.0/3))
    0.33333333333333331
    zsh% while ./measure; do ; done

This while loop should not stop as POSIX is clear that execve must set
fenv to the default, where FCSR should be zero.  But in fact it will
just stop after running for a while (normally less than 30 seconds).
Note that "$((1.0/3))" is needed to reproduce this issue because it
raises FE_INVALID and makes fcsr0 non-zero.

The problem is we are currently relying on SET_PERSONALITY2() to reset
current->thread.fpu.fcsr.  But SET_PERSONALITY2() is executed before
start_thread which calls lose_fpu(0).  We can see if kernel preempt is
enabled, we may switch to another thread after SET_PERSONALITY2() but
before lose_fpu(0).  Then bad thing happens: during the thread switch
the value of the fcsr0 register is stored into current->thread.fpu.fcsr,
making it dirty again.

The issue can be fixed by setting current->thread.fpu.fcsr after
lose_fpu(0) because lose_fpu() clears TIF_USEDFPU, then the thread
switch won't touch current->thread.fpu.fcsr.

The only other architecture setting FCSR in SET_PERSONALITY2() is MIPS.
I've ran a similar test on MIPS with mainline kernel and it turns out
MIPS is buggy, too.  Anyway MIPS do this for supporting different FP
flavors (NaN encodings, etc.) which do not exist on LoongArch.  So for
LoongArch, we can simply remove the current->thread.fpu.fcsr setting
from SET_PERSONALITY2() and do it in start_thread(), after lose_fpu(0).

The while loop failing with the mainline kernel has survived one hour
after this change on LoongArch.

Fixes: 803b0fc5c3 ("LoongArch: Add process management")
Closes: https://github.com/loongson-community/discussions/issues/7
Link: https://lore.kernel.org/linux-mips/7a6aa1bbdbbe2e63ae96ff163fab0349f58f1b9e.camel@xry111.site/
Cc: stable@vger.kernel.org
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2024-01-17 12:43:08 +08:00

340 lines
9.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#ifndef _ASM_ELF_H
#define _ASM_ELF_H
#include <linux/auxvec.h>
#include <linux/fs.h>
#include <uapi/linux/elf.h>
#include <asm/current.h>
#include <asm/vdso.h>
/* The ABI of a file. */
#define EF_LOONGARCH_ABI_LP64_SOFT_FLOAT 0x1
#define EF_LOONGARCH_ABI_LP64_SINGLE_FLOAT 0x2
#define EF_LOONGARCH_ABI_LP64_DOUBLE_FLOAT 0x3
#define EF_LOONGARCH_ABI_ILP32_SOFT_FLOAT 0x5
#define EF_LOONGARCH_ABI_ILP32_SINGLE_FLOAT 0x6
#define EF_LOONGARCH_ABI_ILP32_DOUBLE_FLOAT 0x7
/* LoongArch relocation types used by the dynamic linker */
#define R_LARCH_NONE 0
#define R_LARCH_32 1
#define R_LARCH_64 2
#define R_LARCH_RELATIVE 3
#define R_LARCH_COPY 4
#define R_LARCH_JUMP_SLOT 5
#define R_LARCH_TLS_DTPMOD32 6
#define R_LARCH_TLS_DTPMOD64 7
#define R_LARCH_TLS_DTPREL32 8
#define R_LARCH_TLS_DTPREL64 9
#define R_LARCH_TLS_TPREL32 10
#define R_LARCH_TLS_TPREL64 11
#define R_LARCH_IRELATIVE 12
#define R_LARCH_MARK_LA 20
#define R_LARCH_MARK_PCREL 21
#define R_LARCH_SOP_PUSH_PCREL 22
#define R_LARCH_SOP_PUSH_ABSOLUTE 23
#define R_LARCH_SOP_PUSH_DUP 24
#define R_LARCH_SOP_PUSH_GPREL 25
#define R_LARCH_SOP_PUSH_TLS_TPREL 26
#define R_LARCH_SOP_PUSH_TLS_GOT 27
#define R_LARCH_SOP_PUSH_TLS_GD 28
#define R_LARCH_SOP_PUSH_PLT_PCREL 29
#define R_LARCH_SOP_ASSERT 30
#define R_LARCH_SOP_NOT 31
#define R_LARCH_SOP_SUB 32
#define R_LARCH_SOP_SL 33
#define R_LARCH_SOP_SR 34
#define R_LARCH_SOP_ADD 35
#define R_LARCH_SOP_AND 36
#define R_LARCH_SOP_IF_ELSE 37
#define R_LARCH_SOP_POP_32_S_10_5 38
#define R_LARCH_SOP_POP_32_U_10_12 39
#define R_LARCH_SOP_POP_32_S_10_12 40
#define R_LARCH_SOP_POP_32_S_10_16 41
#define R_LARCH_SOP_POP_32_S_10_16_S2 42
#define R_LARCH_SOP_POP_32_S_5_20 43
#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44
#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45
#define R_LARCH_SOP_POP_32_U 46
#define R_LARCH_ADD8 47
#define R_LARCH_ADD16 48
#define R_LARCH_ADD24 49
#define R_LARCH_ADD32 50
#define R_LARCH_ADD64 51
#define R_LARCH_SUB8 52
#define R_LARCH_SUB16 53
#define R_LARCH_SUB24 54
#define R_LARCH_SUB32 55
#define R_LARCH_SUB64 56
#define R_LARCH_GNU_VTINHERIT 57
#define R_LARCH_GNU_VTENTRY 58
#define R_LARCH_B16 64
#define R_LARCH_B21 65
#define R_LARCH_B26 66
#define R_LARCH_ABS_HI20 67
#define R_LARCH_ABS_LO12 68
#define R_LARCH_ABS64_LO20 69
#define R_LARCH_ABS64_HI12 70
#define R_LARCH_PCALA_HI20 71
#define R_LARCH_PCALA_LO12 72
#define R_LARCH_PCALA64_LO20 73
#define R_LARCH_PCALA64_HI12 74
#define R_LARCH_GOT_PC_HI20 75
#define R_LARCH_GOT_PC_LO12 76
#define R_LARCH_GOT64_PC_LO20 77
#define R_LARCH_GOT64_PC_HI12 78
#define R_LARCH_GOT_HI20 79
#define R_LARCH_GOT_LO12 80
#define R_LARCH_GOT64_LO20 81
#define R_LARCH_GOT64_HI12 82
#define R_LARCH_TLS_LE_HI20 83
#define R_LARCH_TLS_LE_LO12 84
#define R_LARCH_TLS_LE64_LO20 85
#define R_LARCH_TLS_LE64_HI12 86
#define R_LARCH_TLS_IE_PC_HI20 87
#define R_LARCH_TLS_IE_PC_LO12 88
#define R_LARCH_TLS_IE64_PC_LO20 89
#define R_LARCH_TLS_IE64_PC_HI12 90
#define R_LARCH_TLS_IE_HI20 91
#define R_LARCH_TLS_IE_LO12 92
#define R_LARCH_TLS_IE64_LO20 93
#define R_LARCH_TLS_IE64_HI12 94
#define R_LARCH_TLS_LD_PC_HI20 95
#define R_LARCH_TLS_LD_HI20 96
#define R_LARCH_TLS_GD_PC_HI20 97
#define R_LARCH_TLS_GD_HI20 98
#define R_LARCH_32_PCREL 99
#define R_LARCH_RELAX 100
#define R_LARCH_DELETE 101
#define R_LARCH_ALIGN 102
#define R_LARCH_PCREL20_S2 103
#define R_LARCH_CFA 104
#define R_LARCH_ADD6 105
#define R_LARCH_SUB6 106
#define R_LARCH_ADD_ULEB128 107
#define R_LARCH_SUB_ULEB128 108
#define R_LARCH_64_PCREL 109
#ifndef ELF_ARCH
/* ELF register definitions */
/*
* General purpose have the following registers:
* Register Number
* GPRs 32
* ORIG_A0 1
* ERA 1
* BADVADDR 1
* CRMD 1
* PRMD 1
* EUEN 1
* ECFG 1
* ESTAT 1
* Reserved 5
*/
#define ELF_NGREG 45
/*
* Floating point have the following registers:
* Register Number
* FPR 32
* FCC 1
* FCSR 1
*/
#define ELF_NFPREG 34
typedef unsigned long elf_greg_t;
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
typedef double elf_fpreg_t;
typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs);
#ifdef CONFIG_32BIT
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
#define elf_check_arch elf32_check_arch
/*
* These are used to set parameters in the core dumps.
*/
#define ELF_CLASS ELFCLASS32
#define ELF_CORE_COPY_REGS(dest, regs) \
loongarch_dump_regs32((u32 *)&(dest), (regs));
#endif /* CONFIG_32BIT */
#ifdef CONFIG_64BIT
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
#define elf_check_arch elf64_check_arch
/*
* These are used to set parameters in the core dumps.
*/
#define ELF_CLASS ELFCLASS64
#define ELF_CORE_COPY_REGS(dest, regs) \
loongarch_dump_regs64((u64 *)&(dest), (regs));
#endif /* CONFIG_64BIT */
/*
* These are used to set parameters in the core dumps.
*/
#define ELF_DATA ELFDATA2LSB
#define ELF_ARCH EM_LOONGARCH
#endif /* !defined(ELF_ARCH) */
#define loongarch_elf_check_machine(x) ((x)->e_machine == EM_LOONGARCH)
#define vmcore_elf32_check_arch loongarch_elf_check_machine
#define vmcore_elf64_check_arch loongarch_elf_check_machine
/*
* Return non-zero if HDR identifies an 32bit ELF binary.
*/
#define elf32_check_arch(hdr) \
({ \
int __res = 1; \
struct elfhdr *__h = (hdr); \
\
if (!loongarch_elf_check_machine(__h)) \
__res = 0; \
if (__h->e_ident[EI_CLASS] != ELFCLASS32) \
__res = 0; \
\
__res; \
})
/*
* Return non-zero if HDR identifies an 64bit ELF binary.
*/
#define elf64_check_arch(hdr) \
({ \
int __res = 1; \
struct elfhdr *__h = (hdr); \
\
if (!loongarch_elf_check_machine(__h)) \
__res = 0; \
if (__h->e_ident[EI_CLASS] != ELFCLASS64) \
__res = 0; \
\
__res; \
})
#ifdef CONFIG_32BIT
#define SET_PERSONALITY2(ex, state) \
do { \
current->thread.vdso = &vdso_info; \
\
if (personality(current->personality) != PER_LINUX) \
set_personality(PER_LINUX); \
} while (0)
#endif /* CONFIG_32BIT */
#ifdef CONFIG_64BIT
#define SET_PERSONALITY2(ex, state) \
do { \
unsigned int p; \
\
clear_thread_flag(TIF_32BIT_REGS); \
clear_thread_flag(TIF_32BIT_ADDR); \
\
current->thread.vdso = &vdso_info; \
\
p = personality(current->personality); \
if (p != PER_LINUX32 && p != PER_LINUX) \
set_personality(PER_LINUX); \
} while (0)
#endif /* CONFIG_64BIT */
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE PAGE_SIZE
/*
* This yields a mask that user programs can use to figure out what
* instruction set this cpu supports. This could be done in userspace,
* but it's not easy, and we've already done it here.
*/
#define ELF_HWCAP (elf_hwcap)
extern unsigned int elf_hwcap;
#include <asm/hwcap.h>
/*
* This yields a string that ld.so will use to load implementation
* specific libraries for optimization. This is more specific in
* intent than poking at uname or /proc/cpuinfo.
*/
#define ELF_PLATFORM __elf_platform
extern const char *__elf_platform;
#define ELF_PLAT_INIT(_r, load_addr) do { \
_r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \
_r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \
_r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \
_r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \
_r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \
_r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \
_r->regs[25] = _r->regs[26] = _r->regs[27] = _r->regs[28] = 0; \
_r->regs[29] = _r->regs[30] = _r->regs[31] = 0; \
} while (0)
/*
* This is the location that an ET_DYN program is loaded if exec'ed. Typical
* use of this is to invoke "./ld.so someprog" to test out a new version of
* the loader. We need to make sure that it is out of the way of the program
* that it will "exec", and that there is sufficient room for the brk.
*/
#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
#define ARCH_DLINFO \
do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long)current->mm->context.vdso); \
} while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
struct arch_elf_state {
int fp_abi;
int interp_fp_abi;
};
#define LOONGARCH_ABI_FP_ANY (0)
#define INIT_ARCH_ELF_STATE { \
.fp_abi = LOONGARCH_ABI_FP_ANY, \
.interp_fp_abi = LOONGARCH_ABI_FP_ANY, \
}
extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
bool is_interp, struct arch_elf_state *state);
extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
struct arch_elf_state *state);
#endif /* _ASM_ELF_H */