2019-08-25 10:49:18 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2005-09-03 15:57:47 -07:00
|
|
|
/*
|
2025-06-02 15:00:51 +02:00
|
|
|
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
|
2007-10-16 01:27:00 -07:00
|
|
|
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
2005-09-16 19:27:50 -07:00
|
|
|
#include <stdlib.h>
|
2007-10-16 01:27:00 -07:00
|
|
|
#include <stdarg.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <errno.h>
|
2007-10-16 01:27:00 -07:00
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <string.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <sys/mman.h>
|
2007-10-16 01:27:00 -07:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/wait.h>
|
2012-12-30 01:37:30 +03:00
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/resource.h>
|
2024-07-03 15:45:30 +02:00
|
|
|
#include <asm/ldt.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <asm/unistd.h>
|
2012-10-08 03:27:32 +01:00
|
|
|
#include <init.h>
|
|
|
|
#include <os.h>
|
2024-03-06 18:19:23 +08:00
|
|
|
#include <kern_util.h>
|
2012-10-08 03:27:32 +01:00
|
|
|
#include <mem_user.h>
|
|
|
|
#include <ptrace_user.h>
|
2025-06-02 15:00:51 +02:00
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stub-data.h>
|
|
|
|
#include <sys/prctl.h>
|
|
|
|
#include <linux/seccomp.h>
|
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <sysdep/mcontext.h>
|
|
|
|
#include <sysdep/stub.h>
|
2012-10-08 03:27:32 +01:00
|
|
|
#include <registers.h>
|
|
|
|
#include <skas.h>
|
2024-04-23 20:58:56 +08:00
|
|
|
#include "internal.h"
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2008-04-28 02:13:53 -07:00
|
|
|
static void ptrace_child(void)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
int ret;
|
2007-10-16 01:27:11 -07:00
|
|
|
/* Calling os_getpid because some libcs cached getpid incorrectly */
|
2005-04-16 15:20:36 -07:00
|
|
|
int pid = os_getpid(), ppid = getppid();
|
|
|
|
int sc_result;
|
|
|
|
|
2008-04-28 02:13:53 -07:00
|
|
|
if (change_sig(SIGWINCH, 0) < 0 ||
|
|
|
|
ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
|
2005-04-16 15:20:36 -07:00
|
|
|
perror("ptrace");
|
2007-10-16 01:27:11 -07:00
|
|
|
kill(pid, SIGKILL);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2007-02-10 01:44:20 -08:00
|
|
|
kill(pid, SIGSTOP);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
/*
|
|
|
|
* This syscall will be intercepted by the parent. Don't call more than
|
|
|
|
* once, please.
|
|
|
|
*/
|
2005-04-16 15:20:36 -07:00
|
|
|
sc_result = os_getpid();
|
|
|
|
|
|
|
|
if (sc_result == pid)
|
2007-10-16 01:27:00 -07:00
|
|
|
/* Nothing modified by the parent, we are running normally. */
|
|
|
|
ret = 1;
|
2005-04-16 15:20:36 -07:00
|
|
|
else if (sc_result == ppid)
|
2007-10-16 01:27:00 -07:00
|
|
|
/*
|
|
|
|
* Expected in check_ptrace and check_sysemu when they succeed
|
|
|
|
* in modifying the stack frame
|
|
|
|
*/
|
|
|
|
ret = 0;
|
2005-04-16 15:20:36 -07:00
|
|
|
else
|
2007-10-16 01:27:00 -07:00
|
|
|
/* Serious trouble! This could be caused by a bug in host 2.6
|
|
|
|
* SKAS3/2.6 patch before release -V6, together with a bug in
|
|
|
|
* the UML code itself.
|
|
|
|
*/
|
|
|
|
ret = 2;
|
2008-02-04 22:31:04 -08:00
|
|
|
|
|
|
|
exit(ret);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2008-02-04 22:30:35 -08:00
|
|
|
static void fatal_perror(const char *str)
|
2007-02-10 01:44:28 -08:00
|
|
|
{
|
|
|
|
perror(str);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fatal(char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list list;
|
|
|
|
|
|
|
|
va_start(list, fmt);
|
2008-04-28 02:13:53 -07:00
|
|
|
vfprintf(stderr, fmt, list);
|
2007-02-10 01:44:28 -08:00
|
|
|
va_end(list);
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void non_fatal(char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list list;
|
|
|
|
|
|
|
|
va_start(list, fmt);
|
2008-04-28 02:13:53 -07:00
|
|
|
vfprintf(stderr, fmt, list);
|
2007-02-10 01:44:28 -08:00
|
|
|
va_end(list);
|
|
|
|
}
|
|
|
|
|
2007-10-16 01:27:09 -07:00
|
|
|
static int start_ptraced_child(void)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
int pid, n, status;
|
2005-09-03 15:57:47 -07:00
|
|
|
|
2015-12-18 21:28:53 +01:00
|
|
|
fflush(stdout);
|
|
|
|
|
2007-10-16 01:27:09 -07:00
|
|
|
pid = fork();
|
|
|
|
if (pid == 0)
|
|
|
|
ptrace_child();
|
|
|
|
else if (pid < 0)
|
|
|
|
fatal_perror("start_ptraced_child : fork failed");
|
2007-10-16 01:27:00 -07:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
|
2007-10-16 01:27:00 -07:00
|
|
|
if (n < 0)
|
2007-10-16 01:27:09 -07:00
|
|
|
fatal_perror("check_ptrace : waitpid failed");
|
2007-10-16 01:27:00 -07:00
|
|
|
if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
|
2007-02-10 01:44:28 -08:00
|
|
|
fatal("check_ptrace : expected SIGSTOP, got status = %d",
|
2005-04-16 15:20:36 -07:00
|
|
|
status);
|
|
|
|
|
2007-02-10 01:44:20 -08:00
|
|
|
return pid;
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2023-11-10 12:03:40 +01:00
|
|
|
static void stop_ptraced_child(int pid, int exitcode)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2023-11-10 12:03:40 +01:00
|
|
|
int status, n;
|
|
|
|
|
|
|
|
if (ptrace(PTRACE_CONT, pid, 0, 0) < 0)
|
|
|
|
fatal_perror("stop_ptraced_child : ptrace failed");
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
CATCH_EINTR(n = waitpid(pid, &status, 0));
|
2007-10-16 01:27:00 -07:00
|
|
|
if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
|
2005-04-16 15:20:36 -07:00
|
|
|
int exit_with = WEXITSTATUS(status);
|
2023-11-10 12:03:40 +01:00
|
|
|
fatal("stop_ptraced_child : child exited with exitcode %d, "
|
|
|
|
"while expecting %d; status 0x%x\n", exit_with,
|
|
|
|
exitcode, status);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init check_sysemu(void)
|
|
|
|
{
|
2007-02-10 01:44:20 -08:00
|
|
|
int pid, n, status, count=0;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2023-11-10 12:03:38 +01:00
|
|
|
os_info("Checking syscall emulation for ptrace...");
|
2007-10-16 01:27:09 -07:00
|
|
|
pid = start_ptraced_child();
|
2005-09-03 15:57:51 -07:00
|
|
|
|
2023-11-10 12:03:38 +01:00
|
|
|
if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
|
2007-02-10 01:44:28 -08:00
|
|
|
(void *) PTRACE_O_TRACESYSGOOD) < 0))
|
2023-11-10 12:03:38 +01:00
|
|
|
fatal_perror("check_sysemu: PTRACE_SETOPTIONS failed");
|
2005-09-03 15:57:51 -07:00
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
while (1) {
|
2005-04-16 15:20:36 -07:00
|
|
|
count++;
|
2007-10-16 01:27:00 -07:00
|
|
|
if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
|
2005-04-16 15:20:36 -07:00
|
|
|
goto fail;
|
|
|
|
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
|
2007-10-16 01:27:00 -07:00
|
|
|
if (n < 0)
|
2009-03-31 15:23:41 -07:00
|
|
|
fatal_perror("check_sysemu: wait failed");
|
2007-02-10 01:44:28 -08:00
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
if (WIFSTOPPED(status) &&
|
|
|
|
(WSTOPSIG(status) == (SIGTRAP|0x80))) {
|
2008-06-12 15:21:41 -07:00
|
|
|
if (!count) {
|
2009-03-31 15:23:41 -07:00
|
|
|
non_fatal("check_sysemu: SYSEMU_SINGLESTEP "
|
2008-06-12 15:21:41 -07:00
|
|
|
"doesn't singlestep");
|
|
|
|
goto fail;
|
|
|
|
}
|
2011-08-18 20:12:19 +01:00
|
|
|
n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET,
|
2005-04-16 15:20:36 -07:00
|
|
|
os_getpid());
|
2007-10-16 01:27:00 -07:00
|
|
|
if (n < 0)
|
2007-02-10 01:44:28 -08:00
|
|
|
fatal_perror("check_sysemu : failed to modify "
|
|
|
|
"system call return");
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
|
|
|
}
|
2007-10-16 01:27:00 -07:00
|
|
|
else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP))
|
2005-09-03 15:57:51 -07:00
|
|
|
count++;
|
2008-06-12 15:21:41 -07:00
|
|
|
else {
|
2009-03-31 15:23:41 -07:00
|
|
|
non_fatal("check_sysemu: expected SIGTRAP or "
|
2008-06-12 15:21:41 -07:00
|
|
|
"(SIGTRAP | 0x80), got status = %d\n",
|
|
|
|
status);
|
|
|
|
goto fail;
|
|
|
|
}
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
2023-11-10 12:03:40 +01:00
|
|
|
stop_ptraced_child(pid, 0);
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("OK\n");
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
fail:
|
2023-11-10 12:03:40 +01:00
|
|
|
stop_ptraced_child(pid, 1);
|
2023-11-10 12:03:38 +01:00
|
|
|
fatal("missing\n");
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2005-09-03 15:57:47 -07:00
|
|
|
static void __init check_ptrace(void)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
|
|
|
int pid, syscall, n, status;
|
|
|
|
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("Checking that ptrace can change system call numbers...");
|
2007-10-16 01:27:09 -07:00
|
|
|
pid = start_ptraced_child();
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2023-11-10 12:03:38 +01:00
|
|
|
if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
|
2007-02-10 01:44:28 -08:00
|
|
|
(void *) PTRACE_O_TRACESYSGOOD) < 0))
|
2023-11-10 12:03:38 +01:00
|
|
|
fatal_perror("check_ptrace: PTRACE_SETOPTIONS failed");
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
while (1) {
|
|
|
|
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
|
2007-02-10 01:44:28 -08:00
|
|
|
fatal_perror("check_ptrace : ptrace failed");
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
|
2007-10-16 01:27:00 -07:00
|
|
|
if (n < 0)
|
2007-02-10 01:44:28 -08:00
|
|
|
fatal_perror("check_ptrace : wait failed");
|
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
if (!WIFSTOPPED(status) ||
|
2007-02-10 01:44:28 -08:00
|
|
|
(WSTOPSIG(status) != (SIGTRAP | 0x80)))
|
|
|
|
fatal("check_ptrace : expected (SIGTRAP|0x80), "
|
|
|
|
"got status = %d", status);
|
2005-09-03 15:57:47 -07:00
|
|
|
|
2011-08-18 20:12:19 +01:00
|
|
|
syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET,
|
2005-04-16 15:20:36 -07:00
|
|
|
0);
|
2007-10-16 01:27:00 -07:00
|
|
|
if (syscall == __NR_getpid) {
|
2011-08-18 20:12:19 +01:00
|
|
|
n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
|
2005-04-16 15:20:36 -07:00
|
|
|
__NR_getppid);
|
2007-10-16 01:27:00 -07:00
|
|
|
if (n < 0)
|
2007-02-10 01:44:28 -08:00
|
|
|
fatal_perror("check_ptrace : failed to modify "
|
|
|
|
"system call");
|
2005-04-16 15:20:36 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-11-10 12:03:40 +01:00
|
|
|
stop_ptraced_child(pid, 0);
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("OK\n");
|
2005-04-16 15:20:36 -07:00
|
|
|
check_sysemu();
|
|
|
|
}
|
|
|
|
|
2025-06-02 15:00:51 +02:00
|
|
|
extern unsigned long host_fp_size;
|
|
|
|
extern unsigned long exec_regs[MAX_REG_NR];
|
|
|
|
extern unsigned long *exec_fp_regs;
|
|
|
|
|
|
|
|
__initdata static struct stub_data *seccomp_test_stub_data;
|
|
|
|
|
|
|
|
static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
|
|
|
|
{
|
|
|
|
ucontext_t *uc = p;
|
|
|
|
|
|
|
|
/* Stow away the location of the mcontext in the stack */
|
|
|
|
seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext -
|
|
|
|
(unsigned long)&seccomp_test_stub_data->sigstack[0];
|
|
|
|
|
|
|
|
/* Prevent libc from clearing memory (mctx_offset in particular) */
|
|
|
|
syscall(__NR_exit, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init seccomp_helper(void *data)
|
|
|
|
{
|
|
|
|
static struct sock_filter filter[] = {
|
|
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
|
|
|
offsetof(struct seccomp_data, nr)),
|
|
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0),
|
|
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
|
|
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
|
|
|
|
};
|
|
|
|
static struct sock_fprog prog = {
|
|
|
|
.len = ARRAY_SIZE(filter),
|
|
|
|
.filter = filter,
|
|
|
|
};
|
|
|
|
struct sigaction sa;
|
|
|
|
|
um: pass FD for memory operations when needed
Instead of always sharing the FDs with the userspace process, only hand
over the FDs needed for mmap when required. The idea is that userspace
might be able to force the stub into executing an mmap syscall, however,
it will not be able to manipulate the control flow sufficiently to have
access to an FD that would allow mapping arbitrary memory.
Security wise, we need to be sure that only the expected syscalls are
executed after the kernel sends FDs through the socket. This is
currently not the case, as userspace can trivially jump to the
rt_sigreturn syscall instruction to execute any syscall that the stub is
permitted to do. With this, it can trick the kernel to send the FD,
which in turn allows userspace to freely map any physical memory.
As such, this is currently *not* secure. However, in principle the
approach should be fine with a more strict SECCOMP filter and a careful
review of the stub control flow (as userspace can prepare a stack). With
some care, it is likely possible to extend the security model to SMP if
desired.
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-8-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-06-02 15:00:52 +02:00
|
|
|
/* close_range is needed for the stub */
|
|
|
|
if (stub_syscall3(__NR_close_range, 1, ~0U, 0))
|
|
|
|
exit(1);
|
|
|
|
|
2025-06-02 15:00:51 +02:00
|
|
|
set_sigstack(seccomp_test_stub_data->sigstack,
|
|
|
|
sizeof(seccomp_test_stub_data->sigstack));
|
|
|
|
|
|
|
|
sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
|
|
|
|
sa.sa_sigaction = (void *) sigsys_handler;
|
|
|
|
sa.sa_restorer = NULL;
|
|
|
|
if (sigaction(SIGSYS, &sa, NULL) < 0)
|
um: pass FD for memory operations when needed
Instead of always sharing the FDs with the userspace process, only hand
over the FDs needed for mmap when required. The idea is that userspace
might be able to force the stub into executing an mmap syscall, however,
it will not be able to manipulate the control flow sufficiently to have
access to an FD that would allow mapping arbitrary memory.
Security wise, we need to be sure that only the expected syscalls are
executed after the kernel sends FDs through the socket. This is
currently not the case, as userspace can trivially jump to the
rt_sigreturn syscall instruction to execute any syscall that the stub is
permitted to do. With this, it can trick the kernel to send the FD,
which in turn allows userspace to freely map any physical memory.
As such, this is currently *not* secure. However, in principle the
approach should be fine with a more strict SECCOMP filter and a careful
review of the stub control flow (as userspace can prepare a stack). With
some care, it is likely possible to extend the security model to SMP if
desired.
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-8-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-06-02 15:00:52 +02:00
|
|
|
exit(2);
|
2025-06-02 15:00:51 +02:00
|
|
|
|
|
|
|
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
|
|
|
if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
|
|
|
|
SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
|
um: pass FD for memory operations when needed
Instead of always sharing the FDs with the userspace process, only hand
over the FDs needed for mmap when required. The idea is that userspace
might be able to force the stub into executing an mmap syscall, however,
it will not be able to manipulate the control flow sufficiently to have
access to an FD that would allow mapping arbitrary memory.
Security wise, we need to be sure that only the expected syscalls are
executed after the kernel sends FDs through the socket. This is
currently not the case, as userspace can trivially jump to the
rt_sigreturn syscall instruction to execute any syscall that the stub is
permitted to do. With this, it can trick the kernel to send the FD,
which in turn allows userspace to freely map any physical memory.
As such, this is currently *not* secure. However, in principle the
approach should be fine with a more strict SECCOMP filter and a careful
review of the stub control flow (as userspace can prepare a stack). With
some care, it is likely possible to extend the security model to SMP if
desired.
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-8-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-06-02 15:00:52 +02:00
|
|
|
exit(3);
|
2025-06-02 15:00:51 +02:00
|
|
|
|
|
|
|
sleep(0);
|
|
|
|
|
|
|
|
/* Never reached. */
|
um: pass FD for memory operations when needed
Instead of always sharing the FDs with the userspace process, only hand
over the FDs needed for mmap when required. The idea is that userspace
might be able to force the stub into executing an mmap syscall, however,
it will not be able to manipulate the control flow sufficiently to have
access to an FD that would allow mapping arbitrary memory.
Security wise, we need to be sure that only the expected syscalls are
executed after the kernel sends FDs through the socket. This is
currently not the case, as userspace can trivially jump to the
rt_sigreturn syscall instruction to execute any syscall that the stub is
permitted to do. With this, it can trick the kernel to send the FD,
which in turn allows userspace to freely map any physical memory.
As such, this is currently *not* secure. However, in principle the
approach should be fine with a more strict SECCOMP filter and a careful
review of the stub control flow (as userspace can prepare a stack). With
some care, it is likely possible to extend the security model to SMP if
desired.
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-8-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-06-02 15:00:52 +02:00
|
|
|
_exit(4);
|
2025-06-02 15:00:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool __init init_seccomp(void)
|
|
|
|
{
|
|
|
|
int pid;
|
|
|
|
int status;
|
|
|
|
int n;
|
|
|
|
unsigned long sp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We check that we can install a seccomp filter and then exit(0)
|
|
|
|
* from a trapped syscall.
|
|
|
|
*
|
|
|
|
* Note that we cannot verify that no seccomp filter already exists
|
|
|
|
* for a syscall that results in the process/thread to be killed.
|
|
|
|
*/
|
|
|
|
|
|
|
|
os_info("Checking that seccomp filters can be installed...");
|
|
|
|
|
|
|
|
seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data),
|
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
MAP_SHARED | MAP_ANON, 0, 0);
|
|
|
|
|
|
|
|
/* Use the syscall data area as stack, we just need something */
|
|
|
|
sp = (unsigned long)&seccomp_test_stub_data->syscall_data +
|
|
|
|
sizeof(seccomp_test_stub_data->syscall_data) -
|
|
|
|
sizeof(void *);
|
|
|
|
pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL);
|
|
|
|
|
|
|
|
if (pid < 0)
|
|
|
|
fatal_perror("check_seccomp : clone failed");
|
|
|
|
|
|
|
|
CATCH_EINTR(n = waitpid(pid, &status, __WCLONE));
|
|
|
|
if (n < 0)
|
|
|
|
fatal_perror("check_seccomp : waitpid failed");
|
|
|
|
|
|
|
|
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
|
|
|
|
struct uml_pt_regs *regs;
|
|
|
|
unsigned long fp_size;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
/* Fill in the host_fp_size from the mcontext. */
|
|
|
|
regs = calloc(1, sizeof(struct uml_pt_regs));
|
|
|
|
get_stub_state(regs, seccomp_test_stub_data, &fp_size);
|
|
|
|
host_fp_size = fp_size;
|
|
|
|
free(regs);
|
|
|
|
|
|
|
|
/* Repeat with the correct size */
|
|
|
|
regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size);
|
|
|
|
r = get_stub_state(regs, seccomp_test_stub_data, NULL);
|
|
|
|
|
|
|
|
/* Store as the default startup registers */
|
|
|
|
exec_fp_regs = malloc(host_fp_size);
|
|
|
|
memcpy(exec_regs, regs->gp, sizeof(exec_regs));
|
|
|
|
memcpy(exec_fp_regs, regs->fp, host_fp_size);
|
|
|
|
|
|
|
|
munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
|
|
|
|
|
|
|
|
free(regs);
|
|
|
|
|
|
|
|
if (r) {
|
|
|
|
os_info("failed to fetch registers: %d\n", r);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
os_info("OK\n");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
|
|
|
|
os_info("missing\n");
|
|
|
|
else
|
|
|
|
os_info("error\n");
|
|
|
|
|
|
|
|
munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-05-06 14:51:11 -07:00
|
|
|
static void __init check_coredump_limit(void)
|
2007-05-06 14:51:00 -07:00
|
|
|
{
|
|
|
|
struct rlimit lim;
|
|
|
|
int err = getrlimit(RLIMIT_CORE, &lim);
|
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
if (err) {
|
2007-05-06 14:51:00 -07:00
|
|
|
perror("Getting core dump limit");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("Core dump limits :\n\tsoft - ");
|
2007-10-16 01:27:00 -07:00
|
|
|
if (lim.rlim_cur == RLIM_INFINITY)
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("NONE\n");
|
|
|
|
else
|
|
|
|
os_info("%llu\n", (unsigned long long)lim.rlim_cur);
|
2007-05-06 14:51:00 -07:00
|
|
|
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("\thard - ");
|
2007-10-16 01:27:00 -07:00
|
|
|
if (lim.rlim_max == RLIM_INFINITY)
|
2017-05-18 02:17:14 +09:00
|
|
|
os_info("NONE\n");
|
|
|
|
else
|
|
|
|
os_info("%llu\n", (unsigned long long)lim.rlim_max);
|
2007-05-06 14:51:00 -07:00
|
|
|
}
|
|
|
|
|
2021-03-12 15:16:07 +00:00
|
|
|
void __init get_host_cpu_features(
|
|
|
|
void (*flags_helper_func)(char *line),
|
|
|
|
void (*cache_helper_func)(char *line))
|
|
|
|
{
|
|
|
|
FILE *cpuinfo;
|
|
|
|
char *line = NULL;
|
|
|
|
size_t len = 0;
|
|
|
|
int done_parsing = 0;
|
|
|
|
|
|
|
|
cpuinfo = fopen("/proc/cpuinfo", "r");
|
|
|
|
if (cpuinfo == NULL) {
|
|
|
|
os_info("Failed to get host CPU features\n");
|
|
|
|
} else {
|
|
|
|
while ((getline(&line, &len, cpuinfo)) != -1) {
|
|
|
|
if (strstr(line, "flags")) {
|
|
|
|
flags_helper_func(line);
|
|
|
|
done_parsing++;
|
|
|
|
}
|
|
|
|
if (strstr(line, "cache_alignment")) {
|
|
|
|
cache_helper_func(line);
|
|
|
|
done_parsing++;
|
|
|
|
}
|
|
|
|
free(line);
|
|
|
|
line = NULL;
|
|
|
|
if (done_parsing > 1)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
fclose(cpuinfo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-06-02 15:00:51 +02:00
|
|
|
static int seccomp_config __initdata;
|
|
|
|
|
|
|
|
static int __init uml_seccomp_config(char *line, int *add)
|
|
|
|
{
|
|
|
|
*add = 0;
|
|
|
|
|
|
|
|
if (strcmp(line, "off") == 0)
|
|
|
|
seccomp_config = 0;
|
|
|
|
else if (strcmp(line, "auto") == 0)
|
|
|
|
seccomp_config = 1;
|
|
|
|
else if (strcmp(line, "on") == 0)
|
|
|
|
seccomp_config = 2;
|
|
|
|
else
|
|
|
|
fatal("Invalid seccomp option '%s', expected on/auto/off\n",
|
|
|
|
line);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
__uml_setup("seccomp=", uml_seccomp_config,
|
|
|
|
"seccomp=<on/auto/off>\n"
|
|
|
|
" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n"
|
|
|
|
" processes work collaboratively with the kernel instead of being\n"
|
|
|
|
" traced using ptrace. All syscalls from the application are caught and\n"
|
|
|
|
" redirected using a signal. This signal handler in turn is permitted to\n"
|
|
|
|
" do the selected set of syscalls to communicate with the UML kernel and\n"
|
|
|
|
" do the required memory management.\n"
|
|
|
|
"\n"
|
|
|
|
" This method is overall faster than the ptrace based userspace, primarily\n"
|
|
|
|
" because it reduces the number of context switches for (minor) page faults.\n"
|
|
|
|
"\n"
|
|
|
|
" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n"
|
|
|
|
" userspace from reading and writing all physical memory. Userspace\n"
|
|
|
|
" processes could also trick the stub into disabling SIGALRM which\n"
|
|
|
|
" prevents it from being interrupted for scheduling purposes.\n"
|
|
|
|
"\n"
|
|
|
|
" This is insecure and should only be used with a trusted userspace\n\n"
|
|
|
|
);
|
2021-03-12 15:16:07 +00:00
|
|
|
|
2007-05-06 14:51:11 -07:00
|
|
|
void __init os_early_checks(void)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
2008-02-04 22:31:22 -08:00
|
|
|
int pid;
|
|
|
|
|
2007-05-06 14:51:00 -07:00
|
|
|
/* Print out the core dump limits early */
|
|
|
|
check_coredump_limit();
|
|
|
|
|
2005-09-16 19:27:50 -07:00
|
|
|
/* Need to check this early because mmapping happens before the
|
|
|
|
* kernel is running.
|
|
|
|
*/
|
|
|
|
check_tmpexec();
|
2008-02-04 22:31:22 -08:00
|
|
|
|
2025-06-02 15:00:51 +02:00
|
|
|
if (seccomp_config) {
|
|
|
|
if (init_seccomp()) {
|
|
|
|
using_seccomp = 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (seccomp_config == 2)
|
|
|
|
fatal("SECCOMP userspace requested but not functional!\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
using_seccomp = 0;
|
|
|
|
check_ptrace();
|
|
|
|
|
2008-02-04 22:31:22 -08:00
|
|
|
pid = start_ptraced_child();
|
2021-09-12 23:12:52 -07:00
|
|
|
if (init_pid_registers(pid))
|
2008-02-04 22:31:22 -08:00
|
|
|
fatal("Failed to initialize default registers");
|
2023-11-10 12:03:40 +01:00
|
|
|
stop_ptraced_child(pid, 1);
|
2005-04-16 15:20:36 -07:00
|
|
|
}
|
|
|
|
|
2005-09-16 19:27:50 -07:00
|
|
|
int __init parse_iomem(char *str, int *add)
|
|
|
|
{
|
|
|
|
struct iomem_region *new;
|
2007-02-10 01:44:20 -08:00
|
|
|
struct stat64 buf;
|
2005-09-16 19:27:50 -07:00
|
|
|
char *file, *driver;
|
2007-02-10 01:44:20 -08:00
|
|
|
int fd, size;
|
2005-09-16 19:27:50 -07:00
|
|
|
|
|
|
|
driver = str;
|
|
|
|
file = strchr(str,',');
|
2007-10-16 01:27:00 -07:00
|
|
|
if (file == NULL) {
|
2017-05-18 02:19:31 +09:00
|
|
|
os_warn("parse_iomem : failed to parse iomem\n");
|
2005-09-16 19:27:50 -07:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
*file = '\0';
|
|
|
|
file++;
|
2007-02-10 01:44:20 -08:00
|
|
|
fd = open(file, O_RDWR, 0);
|
2007-10-16 01:27:00 -07:00
|
|
|
if (fd < 0) {
|
2007-10-16 01:27:11 -07:00
|
|
|
perror("parse_iomem - Couldn't open io file");
|
2005-09-16 19:27:50 -07:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2007-10-16 01:27:00 -07:00
|
|
|
if (fstat64(fd, &buf) < 0) {
|
2007-02-10 01:44:20 -08:00
|
|
|
perror("parse_iomem - cannot stat_fd file");
|
2005-09-16 19:27:50 -07:00
|
|
|
goto out_close;
|
|
|
|
}
|
|
|
|
|
|
|
|
new = malloc(sizeof(*new));
|
2007-10-16 01:27:00 -07:00
|
|
|
if (new == NULL) {
|
2005-09-16 19:27:50 -07:00
|
|
|
perror("Couldn't allocate iomem_region struct");
|
|
|
|
goto out_close;
|
|
|
|
}
|
|
|
|
|
2007-02-10 01:44:20 -08:00
|
|
|
size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1);
|
2005-09-16 19:27:50 -07:00
|
|
|
|
|
|
|
*new = ((struct iomem_region) { .next = iomem_regions,
|
|
|
|
.driver = driver,
|
|
|
|
.fd = fd,
|
|
|
|
.size = size,
|
|
|
|
.phys = 0,
|
|
|
|
.virt = 0 });
|
|
|
|
iomem_regions = new;
|
|
|
|
iomem_size += new->size + UM_KERN_PAGE_SIZE;
|
|
|
|
|
2007-02-10 01:44:20 -08:00
|
|
|
return 0;
|
2005-09-16 19:27:50 -07:00
|
|
|
out_close:
|
2007-02-10 01:44:20 -08:00
|
|
|
close(fd);
|
2005-09-16 19:27:50 -07:00
|
|
|
out:
|
2007-02-10 01:44:20 -08:00
|
|
|
return 1;
|
2005-09-16 19:27:50 -07:00
|
|
|
}
|