2019-05-29 07:18:00 -07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2017-07-10 18:04:30 -07:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
|
|
|
|
* Copyright (C) 2012 Regents of the University of California
|
|
|
|
* Copyright (C) 2017 SiFive
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _ASM_RISCV_THREAD_INFO_H
|
|
|
|
#define _ASM_RISCV_THREAD_INFO_H
|
|
|
|
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <linux/const.h>
|
2024-06-24 07:53:16 +08:00
|
|
|
#include <linux/sizes.h>
|
2017-07-10 18:04:30 -07:00
|
|
|
|
|
|
|
/* thread information allocation */
|
2024-09-17 17:03:28 +02:00
|
|
|
#ifdef CONFIG_KASAN
|
|
|
|
#define KASAN_STACK_ORDER 1
|
|
|
|
#else
|
|
|
|
#define KASAN_STACK_ORDER 0
|
|
|
|
#endif
|
|
|
|
#define THREAD_SIZE_ORDER (CONFIG_THREAD_SIZE_ORDER + KASAN_STACK_ORDER)
|
2017-07-10 18:04:30 -07:00
|
|
|
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
|
|
|
|
riscv: add VMAP_STACK overflow detection
This patch adds stack overflow detection to riscv, usable when
CONFIG_VMAP_STACK=y.
Overflow is detected in kernel exception entry(kernel/entry.S), if the
kernel stack is overflow and been detected, the overflow handler is
invoked on a per-cpu overflow stack. This approach preserves GPRs and
the original exception information.
The overflow detect is performed before any attempt is made to access
the stack and the principle of stack overflow detection: kernel stacks
are aligned to double their size, enabling overflow to be detected with
a single bit test. For example, a 16K stack is aligned to 32K, ensuring
that bit 14 of the SP must be zero. On an overflow (or underflow), this
bit is flipped. Thus, overflow (of less than the size of the stack) can
be detected by testing whether this bit is set.
This gives us a useful error message on stack overflow, as can be
trigger with the LKDTM overflow test:
[ 388.053267] lkdtm: Performing direct entry EXHAUST_STACK
[ 388.053663] lkdtm: Calling function with 1024 frame size to depth 32 ...
[ 388.054016] lkdtm: loop 32/32 ...
[ 388.054186] lkdtm: loop 31/32 ...
[ 388.054491] lkdtm: loop 30/32 ...
[ 388.054672] lkdtm: loop 29/32 ...
[ 388.054859] lkdtm: loop 28/32 ...
[ 388.055010] lkdtm: loop 27/32 ...
[ 388.055163] lkdtm: loop 26/32 ...
[ 388.055309] lkdtm: loop 25/32 ...
[ 388.055481] lkdtm: loop 24/32 ...
[ 388.055653] lkdtm: loop 23/32 ...
[ 388.055837] lkdtm: loop 22/32 ...
[ 388.056015] lkdtm: loop 21/32 ...
[ 388.056188] lkdtm: loop 20/32 ...
[ 388.058145] Insufficient stack space to handle exception!
[ 388.058153] Task stack: [0xffffffd014260000..0xffffffd014264000]
[ 388.058160] Overflow stack: [0xffffffe1f8d2c220..0xffffffe1f8d2d220]
[ 388.058168] CPU: 0 PID: 89 Comm: bash Not tainted 5.12.0-rc8-dirty #90
[ 388.058175] Hardware name: riscv-virtio,qemu (DT)
[ 388.058187] epc : number+0x32/0x2c0
[ 388.058247] ra : vsnprintf+0x2ae/0x3f0
[ 388.058255] epc : ffffffe0002d38f6 ra : ffffffe0002d814e sp : ffffffd01425ffc0
[ 388.058263] gp : ffffffe0012e4010 tp : ffffffe08014da00 t0 : ffffffd0142606e8
[ 388.058271] t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffd014260070
[ 388.058303] s1 : ffffffd014260158 a0 : ffffffd01426015e a1 : ffffffd014260158
[ 388.058311] a2 : 0000000000000013 a3 : ffff0a01ffffff10 a4 : ffffffe000c398e0
[ 388.058319] a5 : 511b02ec65f3e300 a6 : 0000000000a1749a a7 : 0000000000000000
[ 388.058327] s2 : ffffffff000000ff s3 : 00000000ffff0a01 s4 : ffffffe0012e50a8
[ 388.058335] s5 : 0000000000ffff0a s6 : ffffffe0012e50a8 s7 : ffffffe000da1cc0
[ 388.058343] s8 : ffffffffffffffff s9 : ffffffd0142602b0 s10: ffffffd0142602a8
[ 388.058351] s11: ffffffd01426015e t3 : 00000000000f0000 t4 : ffffffffffffffff
[ 388.058359] t5 : 000000000000002f t6 : ffffffd014260158
[ 388.058366] status: 0000000000000100 badaddr: ffffffd01425fff8 cause: 000000000000000f
[ 388.058374] Kernel panic - not syncing: Kernel stack overflow
[ 388.058381] CPU: 0 PID: 89 Comm: bash Not tainted 5.12.0-rc8-dirty #90
[ 388.058387] Hardware name: riscv-virtio,qemu (DT)
[ 388.058393] Call Trace:
[ 388.058400] [<ffffffe000004944>] walk_stackframe+0x0/0xce
[ 388.058406] [<ffffffe0006f0b28>] dump_backtrace+0x38/0x46
[ 388.058412] [<ffffffe0006f0b46>] show_stack+0x10/0x18
[ 388.058418] [<ffffffe0006f3690>] dump_stack+0x74/0x8e
[ 388.058424] [<ffffffe0006f0d52>] panic+0xfc/0x2b2
[ 388.058430] [<ffffffe0006f0acc>] print_trace_address+0x0/0x24
[ 388.058436] [<ffffffe0002d814e>] vsnprintf+0x2ae/0x3f0
[ 388.058956] SMP: stopping secondary CPUs
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2021-06-21 11:28:55 +08:00
|
|
|
/*
|
|
|
|
* By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
|
|
|
|
* checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
|
|
|
|
* assembly.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
|
|
#define THREAD_ALIGN (2 * THREAD_SIZE)
|
|
|
|
#else
|
|
|
|
#define THREAD_ALIGN THREAD_SIZE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
|
|
|
|
#define OVERFLOW_STACK_SIZE SZ_4K
|
|
|
|
|
2023-06-13 21:30:16 -04:00
|
|
|
#define IRQ_STACK_SIZE THREAD_SIZE
|
|
|
|
|
2017-07-10 18:04:30 -07:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/csr.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* low level task data that entry.S needs immediate access to
|
|
|
|
* - this struct should fit entirely inside of one cache line
|
|
|
|
* - if the members of this struct changes, the assembly constants
|
|
|
|
* in asm-offsets.c must be updated accordingly
|
|
|
|
* - thread_info is included in task_struct at an offset of 0. This means that
|
|
|
|
* tp points to both thread_info and task_struct.
|
|
|
|
*/
|
|
|
|
struct thread_info {
|
|
|
|
unsigned long flags; /* low level flags */
|
|
|
|
int preempt_count; /* 0=>preemptible, <0=>BUG */
|
|
|
|
/*
|
|
|
|
* These stack pointers are overwritten on every system call or
|
|
|
|
* exception. SP is also saved to the stack it can be recovered when
|
|
|
|
* overwritten.
|
|
|
|
*/
|
|
|
|
long kernel_sp; /* Kernel stack pointer */
|
|
|
|
long user_sp; /* User stack pointer */
|
|
|
|
int cpu;
|
2023-02-21 22:30:18 -05:00
|
|
|
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
|
2023-09-27 22:48:02 +00:00
|
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
|
|
void *scs_base;
|
|
|
|
void *scs_sp;
|
|
|
|
#endif
|
riscv: Stop emitting preventive sfence.vma for new vmalloc mappings
In 6.5, we removed the vmalloc fault path because that can't work (see
[1] [2]). Then in order to make sure that new page table entries were
seen by the page table walker, we had to preventively emit a sfence.vma
on all harts [3] but this solution is very costly since it relies on IPI.
And even there, we could end up in a loop of vmalloc faults if a vmalloc
allocation is done in the IPI path (for example if it is traced, see
[4]), which could result in a kernel stack overflow.
Those preventive sfence.vma needed to be emitted because:
- if the uarch caches invalid entries, the new mapping may not be
observed by the page table walker and an invalidation may be needed.
- if the uarch does not cache invalid entries, a reordered access
could "miss" the new mapping and traps: in that case, we would actually
only need to retry the access, no sfence.vma is required.
So this patch removes those preventive sfence.vma and actually handles
the possible (and unlikely) exceptions. And since the kernel stacks
mappings lie in the vmalloc area, this handling must be done very early
when the trap is taken, at the very beginning of handle_exception: this
also rules out the vmalloc allocations in the fault path.
Link: https://lore.kernel.org/linux-riscv/20230531093817.665799-1-bjorn@kernel.org/ [1]
Link: https://lore.kernel.org/linux-riscv/20230801090927.2018653-1-dylan@andestech.com [2]
Link: https://lore.kernel.org/linux-riscv/20230725132246.817726-1-alexghiti@rivosinc.com/ [3]
Link: https://lore.kernel.org/lkml/20200508144043.13893-1-joro@8bytes.org/ [4]
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Yunhui Cui <cuiyunhui@bytedance.com>
Link: https://lore.kernel.org/r/20240717060125.139416-4-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-07-17 08:01:24 +02:00
|
|
|
#ifdef CONFIG_64BIT
|
|
|
|
/*
|
|
|
|
* Used in handle_exception() to save a0, a1 and a2 before knowing if we
|
|
|
|
* can access the kernel stack.
|
|
|
|
*/
|
|
|
|
unsigned long a0, a1, a2;
|
|
|
|
#endif
|
2017-07-10 18:04:30 -07:00
|
|
|
};
|
|
|
|
|
2023-09-27 22:48:02 +00:00
|
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
|
|
#define INIT_SCS \
|
|
|
|
.scs_base = init_shadow_call_stack, \
|
|
|
|
.scs_sp = init_shadow_call_stack,
|
|
|
|
#else
|
|
|
|
#define INIT_SCS
|
|
|
|
#endif
|
|
|
|
|
2017-07-10 18:04:30 -07:00
|
|
|
/*
|
|
|
|
* macros/functions for gaining access to the thread information structure
|
|
|
|
*
|
|
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
|
|
*/
|
|
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
|
|
{ \
|
|
|
|
.flags = 0, \
|
|
|
|
.preempt_count = INIT_PREEMPT_COUNT, \
|
2023-09-27 22:48:02 +00:00
|
|
|
INIT_SCS \
|
2017-07-10 18:04:30 -07:00
|
|
|
}
|
|
|
|
|
2023-06-05 11:07:07 +00:00
|
|
|
void arch_release_task_struct(struct task_struct *tsk);
|
|
|
|
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
|
|
|
|
|
2017-07-10 18:04:30 -07:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* thread information flags
|
|
|
|
* - these are process state flags that various assembly files may need to
|
|
|
|
* access
|
|
|
|
* - pending work-to-be-done flags are in lowest half-word
|
|
|
|
* - other flags in upper half-word(s)
|
|
|
|
*/
|
2024-10-21 17:08:42 +02:00
|
|
|
#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
|
|
|
|
#define TIF_NEED_RESCHED_LAZY 1 /* Lazy rescheduling needed */
|
|
|
|
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
|
|
|
#define TIF_SIGPENDING 3 /* signal pending */
|
2017-07-10 18:04:30 -07:00
|
|
|
#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
|
|
|
|
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
|
2020-10-09 14:29:17 -06:00
|
|
|
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
|
2020-12-17 16:01:44 +00:00
|
|
|
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
|
2022-04-05 15:13:03 +08:00
|
|
|
#define TIF_32BIT 11 /* compat-mode 32bit process */
|
2024-01-15 05:59:23 +00:00
|
|
|
#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
|
2017-07-10 18:04:30 -07:00
|
|
|
|
2024-10-21 17:08:42 +02:00
|
|
|
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
|
|
|
#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
2017-07-10 18:04:30 -07:00
|
|
|
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
|
|
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
2020-10-09 14:29:17 -06:00
|
|
|
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
|
2020-12-17 16:01:44 +00:00
|
|
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
2024-01-15 05:59:23 +00:00
|
|
|
#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
|
2017-07-10 18:04:30 -07:00
|
|
|
|
|
|
|
#endif /* _ASM_RISCV_THREAD_INFO_H */
|