2018-07-11 14:56:43 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
2018-07-11 14:56:44 +01:00
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/context_tracking.h>
|
2018-07-11 14:56:43 +01:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/nospec.h>
|
|
|
|
#include <linux/ptrace.h>
|
2021-04-01 16:23:46 -07:00
|
|
|
#include <linux/randomize_kstack.h>
|
2018-07-11 14:56:43 +01:00
|
|
|
#include <linux/syscalls.h>
|
|
|
|
|
2019-04-29 13:03:57 +01:00
|
|
|
#include <asm/debug-monitors.h>
|
2021-01-14 12:48:12 +00:00
|
|
|
#include <asm/exception.h>
|
2018-07-11 14:56:45 +01:00
|
|
|
#include <asm/fpsimd.h>
|
2018-07-11 14:56:43 +01:00
|
|
|
#include <asm/syscall.h>
|
2018-07-11 14:56:44 +01:00
|
|
|
#include <asm/thread_info.h>
|
2018-07-11 14:56:45 +01:00
|
|
|
#include <asm/unistd.h>
|
2022-11-08 16:46:27 +01:00
|
|
|
#include <asm/unistd_compat_32.h>
|
2018-07-11 14:56:43 +01:00
|
|
|
|
2019-01-03 18:00:39 +00:00
|
|
|
long compat_arm_syscall(struct pt_regs *regs, int scno);
|
arm64: implement syscall wrappers
To minimize the risk of userspace-controlled values being used under
speculation, this patch adds pt_regs based syscall wrappers for arm64,
which pass the minimum set of required userspace values to syscall
implementations. For each syscall, a wrapper which takes a pt_regs
argument is automatically generated, and this extracts the arguments
before calling the "real" syscall implementation.
Each syscall has three functions generated:
* __do_<compat_>sys_<name> is the "real" syscall implementation, with
the expected prototype.
* __se_<compat_>sys_<name> is the sign-extension/narrowing wrapper,
inherited from common code. This takes a series of long parameters,
casting each to the requisite types required by the "real" syscall
implementation in __do_<compat_>sys_<name>.
This wrapper *may* not be necessary on arm64 given the AAPCS rules on
unused register bits, but it seemed safer to keep the wrapper for now.
* __arm64_<compat_>_sys_<name> takes a struct pt_regs pointer, and
extracts *only* the relevant register values, passing these on to the
__se_<compat_>sys_<name> wrapper.
The syscall invocation code is updated to handle the calling convention
required by __arm64_<compat_>_sys_<name>, and passes a single struct
pt_regs pointer.
The compiler can fold the syscall implementation and its wrappers, such
that the overhead of this approach is minimized.
Note that we play games with sys_ni_syscall(). It can't be defined with
SYSCALL_DEFINE0() because we must avoid the possibility of error
injection. Additionally, there are a couple of locations where we need
to call it from C code, and we don't (currently) have a
ksys_ni_syscall(). While it has no wrapper, passing in a redundant
pt_regs pointer is benign per the AAPCS.
When ARCH_HAS_SYSCALL_WRAPPER is selected, no prototype is defines for
sys_ni_syscall(). Since we need to treat it differently for in-kernel
calls and the syscall tables, the prototype is defined as-required.
The wrappers are largely the same as their x86 counterparts, but
simplified as we don't have a variety of compat calling conventions that
require separate stubs. Unlike x86, we have some zero-argument compat
syscalls, and must define COMPAT_SYSCALL_DEFINE0() to ensure that these
are also given an __arm64_compat_sys_ prefix.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-07-11 14:56:56 +01:00
|
|
|
long sys_ni_syscall(void);
|
|
|
|
|
2019-01-03 18:00:39 +00:00
|
|
|
static long do_ni_syscall(struct pt_regs *regs, int scno)
|
2018-07-11 14:56:43 +01:00
|
|
|
{
|
|
|
|
if (is_compat_task()) {
|
2024-01-09 00:46:50 -03:00
|
|
|
long ret = compat_arm_syscall(regs, scno);
|
2018-07-11 14:56:43 +01:00
|
|
|
if (ret != -ENOSYS)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return sys_ni_syscall();
|
|
|
|
}
|
|
|
|
|
|
|
|
static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn)
|
|
|
|
{
|
arm64: implement syscall wrappers
To minimize the risk of userspace-controlled values being used under
speculation, this patch adds pt_regs based syscall wrappers for arm64,
which pass the minimum set of required userspace values to syscall
implementations. For each syscall, a wrapper which takes a pt_regs
argument is automatically generated, and this extracts the arguments
before calling the "real" syscall implementation.
Each syscall has three functions generated:
* __do_<compat_>sys_<name> is the "real" syscall implementation, with
the expected prototype.
* __se_<compat_>sys_<name> is the sign-extension/narrowing wrapper,
inherited from common code. This takes a series of long parameters,
casting each to the requisite types required by the "real" syscall
implementation in __do_<compat_>sys_<name>.
This wrapper *may* not be necessary on arm64 given the AAPCS rules on
unused register bits, but it seemed safer to keep the wrapper for now.
* __arm64_<compat_>_sys_<name> takes a struct pt_regs pointer, and
extracts *only* the relevant register values, passing these on to the
__se_<compat_>sys_<name> wrapper.
The syscall invocation code is updated to handle the calling convention
required by __arm64_<compat_>_sys_<name>, and passes a single struct
pt_regs pointer.
The compiler can fold the syscall implementation and its wrappers, such
that the overhead of this approach is minimized.
Note that we play games with sys_ni_syscall(). It can't be defined with
SYSCALL_DEFINE0() because we must avoid the possibility of error
injection. Additionally, there are a couple of locations where we need
to call it from C code, and we don't (currently) have a
ksys_ni_syscall(). While it has no wrapper, passing in a redundant
pt_regs pointer is benign per the AAPCS.
When ARCH_HAS_SYSCALL_WRAPPER is selected, no prototype is defines for
sys_ni_syscall(). Since we need to treat it differently for in-kernel
calls and the syscall tables, the prototype is defined as-required.
The wrappers are largely the same as their x86 counterparts, but
simplified as we don't have a variety of compat calling conventions that
require separate stubs. Unlike x86, we have some zero-argument compat
syscalls, and must define COMPAT_SYSCALL_DEFINE0() to ensure that these
are also given an __arm64_compat_sys_ prefix.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-07-11 14:56:56 +01:00
|
|
|
return syscall_fn(regs);
|
2018-07-11 14:56:43 +01:00
|
|
|
}
|
|
|
|
|
2018-07-11 14:56:44 +01:00
|
|
|
static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
|
|
|
|
unsigned int sc_nr,
|
|
|
|
const syscall_fn_t syscall_table[])
|
2018-07-11 14:56:43 +01:00
|
|
|
{
|
|
|
|
long ret;
|
|
|
|
|
2021-04-01 16:23:46 -07:00
|
|
|
add_random_kstack_offset();
|
|
|
|
|
2018-07-11 14:56:43 +01:00
|
|
|
if (scno < sc_nr) {
|
|
|
|
syscall_fn_t syscall_fn;
|
|
|
|
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
|
|
|
|
ret = __invoke_syscall(regs, syscall_fn);
|
|
|
|
} else {
|
2019-01-03 18:00:39 +00:00
|
|
|
ret = do_ni_syscall(regs, scno);
|
2018-07-11 14:56:43 +01:00
|
|
|
}
|
|
|
|
|
arm64: fix compat syscall return truncation
Due to inconsistencies in the way we manipulate compat GPRs, we have a
few issues today:
* For audit and tracing, where error codes are handled as a (native)
long, negative error codes are expected to be sign-extended to the
native 64-bits, or they may fail to be matched correctly. Thus a
syscall which fails with an error may erroneously be identified as
failing.
* For ptrace, *all* compat return values should be sign-extended for
consistency with 32-bit arm, but we currently only do this for
negative return codes.
* As we may transiently set the upper 32 bits of some compat GPRs while
in the kernel, these can be sampled by perf, which is somewhat
confusing. This means that where a syscall returns a pointer above 2G,
this will be sign-extended, but will not be mistaken for an error as
error codes are constrained to the inclusive range [-4096, -1] where
no user pointer can exist.
To fix all of these, we must consistently use helpers to get/set the
compat GPRs, ensuring that we never write the upper 32 bits of the
return code, and always sign-extend when reading the return code. This
patch does so, with the following changes:
* We re-organise syscall_get_return_value() to always sign-extend for
compat tasks, and reimplement syscall_get_error() atop. We update
syscall_trace_exit() to use syscall_get_return_value().
* We consistently use syscall_set_return_value() to set the return
value, ensureing the upper 32 bits are never set unexpectedly.
* As the core audit code currently uses regs_return_value() rather than
syscall_get_return_value(), we special-case this for
compat_user_mode(regs) such that this will do the right thing. Going
forward, we should try to move the core audit code over to
syscall_get_return_value().
Cc: <stable@vger.kernel.org>
Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: weiyuchen <weiyuchen3@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-08-02 11:42:00 +01:00
|
|
|
syscall_set_return_value(current, regs, 0, ret);
|
2021-04-01 16:23:46 -07:00
|
|
|
|
|
|
|
/*
|
randomize_kstack: Remove non-functional per-arch entropy filtering
An unintended consequence of commit 9c573cd31343 ("randomize_kstack:
Improve entropy diffusion") was that the per-architecture entropy size
filtering reduced how many bits were being added to the mix, rather than
how many bits were being used during the offsetting. All architectures
fell back to the existing default of 0x3FF (10 bits), which will consume
at most 1KiB of stack space. It seems that this is working just fine,
so let's avoid the confusion and update everything to use the default.
The prior intent of the per-architecture limits were:
arm64: capped at 0x1FF (9 bits), 5 bits effective
powerpc: uncapped (10 bits), 6 or 7 bits effective
riscv: uncapped (10 bits), 6 bits effective
x86: capped at 0xFF (8 bits), 5 (x86_64) or 6 (ia32) bits effective
s390: capped at 0xFF (8 bits), undocumented effective entropy
Current discussion has led to just dropping the original per-architecture
filters. The additional entropy appears to be safe for arm64, x86,
and s390. Quoting Arnd, "There is no point pretending that 15.75KB is
somehow safe to use while 15.00KB is not."
Co-developed-by: Yuntao Liu <liuyuntao12@huawei.com>
Signed-off-by: Yuntao Liu <liuyuntao12@huawei.com>
Fixes: 9c573cd31343 ("randomize_kstack: Improve entropy diffusion")
Link: https://lore.kernel.org/r/20240617133721.377540-1-liuyuntao12@huawei.com
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Link: https://lore.kernel.org/r/20240619214711.work.953-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2024-06-19 14:47:15 -07:00
|
|
|
* This value will get limited by KSTACK_OFFSET_MAX(), which is 10
|
|
|
|
* bits. The actual entropy will be further reduced by the compiler
|
|
|
|
* when applying stack alignment constraints: the AAPCS mandates a
|
|
|
|
* 16-byte aligned SP at function boundaries, which will remove the
|
|
|
|
* 4 low bits from any entropy chosen here.
|
2021-04-01 16:23:46 -07:00
|
|
|
*
|
randomize_kstack: Remove non-functional per-arch entropy filtering
An unintended consequence of commit 9c573cd31343 ("randomize_kstack:
Improve entropy diffusion") was that the per-architecture entropy size
filtering reduced how many bits were being added to the mix, rather than
how many bits were being used during the offsetting. All architectures
fell back to the existing default of 0x3FF (10 bits), which will consume
at most 1KiB of stack space. It seems that this is working just fine,
so let's avoid the confusion and update everything to use the default.
The prior intent of the per-architecture limits were:
arm64: capped at 0x1FF (9 bits), 5 bits effective
powerpc: uncapped (10 bits), 6 or 7 bits effective
riscv: uncapped (10 bits), 6 bits effective
x86: capped at 0xFF (8 bits), 5 (x86_64) or 6 (ia32) bits effective
s390: capped at 0xFF (8 bits), undocumented effective entropy
Current discussion has led to just dropping the original per-architecture
filters. The additional entropy appears to be safe for arm64, x86,
and s390. Quoting Arnd, "There is no point pretending that 15.75KB is
somehow safe to use while 15.00KB is not."
Co-developed-by: Yuntao Liu <liuyuntao12@huawei.com>
Signed-off-by: Yuntao Liu <liuyuntao12@huawei.com>
Fixes: 9c573cd31343 ("randomize_kstack: Improve entropy diffusion")
Link: https://lore.kernel.org/r/20240617133721.377540-1-liuyuntao12@huawei.com
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Link: https://lore.kernel.org/r/20240619214711.work.953-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2024-06-19 14:47:15 -07:00
|
|
|
* The resulting 6 bits of entropy is seen in SP[9:4].
|
2021-04-01 16:23:46 -07:00
|
|
|
*/
|
randomize_kstack: Remove non-functional per-arch entropy filtering
An unintended consequence of commit 9c573cd31343 ("randomize_kstack:
Improve entropy diffusion") was that the per-architecture entropy size
filtering reduced how many bits were being added to the mix, rather than
how many bits were being used during the offsetting. All architectures
fell back to the existing default of 0x3FF (10 bits), which will consume
at most 1KiB of stack space. It seems that this is working just fine,
so let's avoid the confusion and update everything to use the default.
The prior intent of the per-architecture limits were:
arm64: capped at 0x1FF (9 bits), 5 bits effective
powerpc: uncapped (10 bits), 6 or 7 bits effective
riscv: uncapped (10 bits), 6 bits effective
x86: capped at 0xFF (8 bits), 5 (x86_64) or 6 (ia32) bits effective
s390: capped at 0xFF (8 bits), undocumented effective entropy
Current discussion has led to just dropping the original per-architecture
filters. The additional entropy appears to be safe for arm64, x86,
and s390. Quoting Arnd, "There is no point pretending that 15.75KB is
somehow safe to use while 15.00KB is not."
Co-developed-by: Yuntao Liu <liuyuntao12@huawei.com>
Signed-off-by: Yuntao Liu <liuyuntao12@huawei.com>
Fixes: 9c573cd31343 ("randomize_kstack: Improve entropy diffusion")
Link: https://lore.kernel.org/r/20240617133721.377540-1-liuyuntao12@huawei.com
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Link: https://lore.kernel.org/r/20240619214711.work.953-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
2024-06-19 14:47:15 -07:00
|
|
|
choose_random_kstack_offset(get_random_u16());
|
2018-07-11 14:56:43 +01:00
|
|
|
}
|
2018-07-11 14:56:44 +01:00
|
|
|
|
|
|
|
static inline bool has_syscall_work(unsigned long flags)
|
|
|
|
{
|
|
|
|
return unlikely(flags & _TIF_SYSCALL_WORK);
|
|
|
|
}
|
|
|
|
|
2018-07-11 14:56:45 +01:00
|
|
|
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
|
|
|
|
const syscall_fn_t syscall_table[])
|
2018-07-11 14:56:44 +01:00
|
|
|
{
|
2021-11-29 13:06:48 +00:00
|
|
|
unsigned long flags = read_thread_flags();
|
2018-07-11 14:56:44 +01:00
|
|
|
|
|
|
|
regs->orig_x0 = regs->regs[0];
|
|
|
|
regs->syscallno = scno;
|
|
|
|
|
2020-03-16 16:50:45 +00:00
|
|
|
/*
|
|
|
|
* BTI note:
|
|
|
|
* The architecture does not guarantee that SPSR.BTYPE is zero
|
|
|
|
* on taking an SVC, so we could return to userspace with a
|
|
|
|
* non-zero BTYPE after the syscall.
|
|
|
|
*
|
|
|
|
* This shouldn't matter except when userspace is explicitly
|
|
|
|
* doing something stupid, such as setting PROT_BTI on a page
|
|
|
|
* that lacks conforming BTI/PACIxSP instructions, falling
|
|
|
|
* through from one executable page to another with differing
|
|
|
|
* PROT_BTI, or messing with BTYPE via ptrace: in such cases,
|
|
|
|
* userspace should not be surprised if a SIGILL occurs on
|
|
|
|
* syscall return.
|
|
|
|
*
|
|
|
|
* So, don't touch regs->pstate & PSR_BTYPE_MASK here.
|
|
|
|
* (Similarly for HVC and SMC elsewhere.)
|
|
|
|
*/
|
|
|
|
|
2020-11-17 19:20:51 -08:00
|
|
|
if (flags & _TIF_MTE_ASYNC_FAULT) {
|
2019-09-16 11:51:17 +01:00
|
|
|
/*
|
|
|
|
* Process the asynchronous tag check fault before the actual
|
|
|
|
* syscall. do_notify_resume() will send a signal to userspace
|
|
|
|
* before the syscall is restarted.
|
|
|
|
*/
|
arm64: fix compat syscall return truncation
Due to inconsistencies in the way we manipulate compat GPRs, we have a
few issues today:
* For audit and tracing, where error codes are handled as a (native)
long, negative error codes are expected to be sign-extended to the
native 64-bits, or they may fail to be matched correctly. Thus a
syscall which fails with an error may erroneously be identified as
failing.
* For ptrace, *all* compat return values should be sign-extended for
consistency with 32-bit arm, but we currently only do this for
negative return codes.
* As we may transiently set the upper 32 bits of some compat GPRs while
in the kernel, these can be sampled by perf, which is somewhat
confusing. This means that where a syscall returns a pointer above 2G,
this will be sign-extended, but will not be mistaken for an error as
error codes are constrained to the inclusive range [-4096, -1] where
no user pointer can exist.
To fix all of these, we must consistently use helpers to get/set the
compat GPRs, ensuring that we never write the upper 32 bits of the
return code, and always sign-extend when reading the return code. This
patch does so, with the following changes:
* We re-organise syscall_get_return_value() to always sign-extend for
compat tasks, and reimplement syscall_get_error() atop. We update
syscall_trace_exit() to use syscall_get_return_value().
* We consistently use syscall_set_return_value() to set the return
value, ensureing the upper 32 bits are never set unexpectedly.
* As the core audit code currently uses regs_return_value() rather than
syscall_get_return_value(), we special-case this for
compat_user_mode(regs) such that this will do the right thing. Going
forward, we should try to move the core audit code over to
syscall_get_return_value().
Cc: <stable@vger.kernel.org>
Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: weiyuchen <weiyuchen3@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-08-02 11:42:00 +01:00
|
|
|
syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0);
|
2019-09-16 11:51:17 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-07-11 14:56:44 +01:00
|
|
|
if (has_syscall_work(flags)) {
|
2020-07-03 09:41:24 +01:00
|
|
|
/*
|
|
|
|
* The de-facto standard way to skip a system call using ptrace
|
|
|
|
* is to set the system call to -1 (NO_SYSCALL) and set x0 to a
|
|
|
|
* suitable error code for consumption by userspace. However,
|
|
|
|
* this cannot be distinguished from a user-issued syscall(-1)
|
|
|
|
* and so we must set x0 to -ENOSYS here in case the tracer doesn't
|
|
|
|
* issue the skip and we fall into trace_exit with x0 preserved.
|
|
|
|
*
|
|
|
|
* This is slightly odd because it also means that if a tracer
|
|
|
|
* sets the system call number to -1 but does not initialise x0,
|
|
|
|
* then x0 will be preserved for all system calls apart from a
|
|
|
|
* user-issued syscall(-1). However, requesting a skip and not
|
|
|
|
* setting the return value is unlikely to do anything sensible
|
|
|
|
* anyway.
|
|
|
|
*/
|
2018-07-11 14:56:44 +01:00
|
|
|
if (scno == NO_SYSCALL)
|
arm64: fix compat syscall return truncation
Due to inconsistencies in the way we manipulate compat GPRs, we have a
few issues today:
* For audit and tracing, where error codes are handled as a (native)
long, negative error codes are expected to be sign-extended to the
native 64-bits, or they may fail to be matched correctly. Thus a
syscall which fails with an error may erroneously be identified as
failing.
* For ptrace, *all* compat return values should be sign-extended for
consistency with 32-bit arm, but we currently only do this for
negative return codes.
* As we may transiently set the upper 32 bits of some compat GPRs while
in the kernel, these can be sampled by perf, which is somewhat
confusing. This means that where a syscall returns a pointer above 2G,
this will be sign-extended, but will not be mistaken for an error as
error codes are constrained to the inclusive range [-4096, -1] where
no user pointer can exist.
To fix all of these, we must consistently use helpers to get/set the
compat GPRs, ensuring that we never write the upper 32 bits of the
return code, and always sign-extend when reading the return code. This
patch does so, with the following changes:
* We re-organise syscall_get_return_value() to always sign-extend for
compat tasks, and reimplement syscall_get_error() atop. We update
syscall_trace_exit() to use syscall_get_return_value().
* We consistently use syscall_set_return_value() to set the return
value, ensureing the upper 32 bits are never set unexpectedly.
* As the core audit code currently uses regs_return_value() rather than
syscall_get_return_value(), we special-case this for
compat_user_mode(regs) such that this will do the right thing. Going
forward, we should try to move the core audit code over to
syscall_get_return_value().
Cc: <stable@vger.kernel.org>
Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: weiyuchen <weiyuchen3@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-08-02 11:42:00 +01:00
|
|
|
syscall_set_return_value(current, regs, -ENOSYS, 0);
|
2018-07-11 14:56:44 +01:00
|
|
|
scno = syscall_trace_enter(regs);
|
|
|
|
if (scno == NO_SYSCALL)
|
|
|
|
goto trace_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
invoke_syscall(regs, scno, sc_nr, syscall_table);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The tracing status may have changed under our feet, so we have to
|
|
|
|
* check again. However, if we were tracing entry, then we always trace
|
|
|
|
* exit regardless, as the old entry assembly did.
|
|
|
|
*/
|
|
|
|
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
|
2021-11-29 13:06:48 +00:00
|
|
|
flags = read_thread_flags();
|
arm64: entry: remove redundant IRQ flag tracing
All EL0 returns go via ret_to_user(), which masks IRQs and notifies
lockdep and tracing before calling into do_notify_resume(). Therefore,
there's no need for do_notify_resume() to call trace_hardirqs_off(), and
the comment is stale. The call is simply redundant.
In ret_to_user() we call exit_to_user_mode(), which notifies lockdep and
tracing the IRQs will be enabled in userspace, so there's no need for
el0_svc_common() to call trace_hardirqs_on() before returning. Further,
at the start of ret_to_user() we call trace_hardirqs_off(), so not only
is this redundant, but it is immediately undone.
In addition to being redundant, the trace_hardirqs_on() in
el0_svc_common() leaves lockdep inconsistent with the hardware state,
and is liable to cause issues for any C code or instrumentation
between this and the call to trace_hardirqs_off() which undoes it in
ret_to_user().
This patch removes the redundant tracing calls and associated stale
comments.
Fixes: 23529049c684 ("arm64: entry: fix non-NMI user<->kernel transitions")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210107145310.44616-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2021-01-07 14:53:10 +00:00
|
|
|
if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
|
2018-07-11 14:56:44 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_exit:
|
|
|
|
syscall_trace_exit(regs);
|
|
|
|
}
|
2018-07-11 14:56:45 +01:00
|
|
|
|
2020-01-16 18:35:47 +00:00
|
|
|
void do_el0_svc(struct pt_regs *regs)
|
2018-07-11 14:56:45 +01:00
|
|
|
{
|
|
|
|
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_COMPAT
|
2020-01-16 18:35:47 +00:00
|
|
|
void do_el0_svc_compat(struct pt_regs *regs)
|
2018-07-11 14:56:45 +01:00
|
|
|
{
|
2022-11-08 16:46:27 +01:00
|
|
|
el0_svc_common(regs, regs->regs[7], __NR_compat32_syscalls,
|
2018-07-11 14:56:45 +01:00
|
|
|
compat_sys_call_table);
|
|
|
|
}
|
|
|
|
#endif
|