mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Instead of always sharing the FDs with the userspace process, only hand over the FDs needed for mmap when required. The idea is that userspace might be able to force the stub into executing an mmap syscall, however, it will not be able to manipulate the control flow sufficiently to have access to an FD that would allow mapping arbitrary memory. Security wise, we need to be sure that only the expected syscalls are executed after the kernel sends FDs through the socket. This is currently not the case, as userspace can trivially jump to the rt_sigreturn syscall instruction to execute any syscall that the stub is permitted to do. With this, it can trick the kernel to send the FD, which in turn allows userspace to freely map any physical memory. As such, this is currently *not* secure. However, in principle the approach should be fine with a more strict SECCOMP filter and a careful review of the stub control flow (as userspace can prepare a stack). With some care, it is likely possible to extend the security model to SMP if desired. Signed-off-by: Benjamin Berg <benjamin.berg@intel.com> Link: https://patch.msgid.link/20250602130052.545733-8-benjamin@sipsolutions.net Signed-off-by: Johannes Berg <johannes.berg@intel.com>
77 lines
1.6 KiB
C
77 lines
1.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
|
|
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
|
|
* Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
|
|
*/
|
|
|
|
#ifndef __STUB_DATA_H
|
|
#define __STUB_DATA_H
|
|
|
|
#include <linux/compiler_types.h>
|
|
#include <as-layout.h>
|
|
#include <sysdep/tls.h>
|
|
#include <sysdep/stub-data.h>
|
|
#include <mm_id.h>
|
|
|
|
#define FUTEX_IN_CHILD 0
|
|
#define FUTEX_IN_KERN 1
|
|
|
|
struct stub_init_data {
|
|
int seccomp;
|
|
|
|
unsigned long stub_start;
|
|
|
|
int stub_code_fd;
|
|
unsigned long stub_code_offset;
|
|
int stub_data_fd;
|
|
unsigned long stub_data_offset;
|
|
|
|
unsigned long signal_handler;
|
|
unsigned long signal_restorer;
|
|
};
|
|
|
|
#define STUB_NEXT_SYSCALL(s) \
|
|
((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
|
|
|
|
enum stub_syscall_type {
|
|
STUB_SYSCALL_UNSET = 0,
|
|
STUB_SYSCALL_MMAP,
|
|
STUB_SYSCALL_MUNMAP,
|
|
};
|
|
|
|
struct stub_syscall {
|
|
struct {
|
|
unsigned long addr;
|
|
unsigned long length;
|
|
unsigned long offset;
|
|
int fd;
|
|
int prot;
|
|
} mem;
|
|
|
|
enum stub_syscall_type syscall;
|
|
};
|
|
|
|
struct stub_data {
|
|
unsigned long offset;
|
|
long err, child_err;
|
|
|
|
int syscall_data_len;
|
|
/* 128 leaves enough room for additional fields in the struct */
|
|
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
|
|
|
|
/* data shared with signal handler (only used in seccomp mode) */
|
|
short restart_wait;
|
|
unsigned int futex;
|
|
int signal;
|
|
unsigned short si_offset;
|
|
unsigned short mctx_offset;
|
|
|
|
/* seccomp architecture specific state restore */
|
|
struct stub_data_arch arch_data;
|
|
|
|
/* Stack for our signal handlers and for calling into . */
|
|
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
|
|
};
|
|
|
|
#endif
|