exec: fix the racy usage of fs_struct->in_exec

check_unsafe_exec() sets fs->in_exec under cred_guard_mutex, then execve()
paths clear fs->in_exec lockless. This is fine if exec succeeds, but if it
fails we have the following race:

	T1 sets fs->in_exec = 1, fails, drops cred_guard_mutex

	T2 sets fs->in_exec = 1

	T1 clears fs->in_exec

	T2 continues with fs->in_exec == 0

Change fs/exec.c to clear fs->in_exec with cred_guard_mutex held.

Reported-by: syzbot+1c486d0b62032c82a968@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001f.GAE@google.com/
Cc: stable@vger.kernel.org
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20250324160003.GA8878@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Oleg Nesterov 2025-03-24 17:00:03 +01:00 committed by Christian Brauner
parent 8661bb9c71
commit af7bb0d2ca
No known key found for this signature in database
GPG key ID: 91C61BC06578DCA2

View file

@ -1227,13 +1227,12 @@ int begin_new_exec(struct linux_binprm * bprm)
*/
bprm->point_of_no_return = true;
/*
* Make this the only thread in the thread group.
*/
/* Make this the only thread in the thread group */
retval = de_thread(me);
if (retval)
goto out;
/* see the comment in check_unsafe_exec() */
current->fs->in_exec = 0;
/*
* Cancel any io_uring activity across execve
*/
@ -1495,6 +1494,8 @@ static void free_bprm(struct linux_binprm *bprm)
}
free_arg_pages(bprm);
if (bprm->cred) {
/* in case exec fails before de_thread() succeeds */
current->fs->in_exec = 0;
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
@ -1616,6 +1617,10 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* suid exec because the differently privileged task
* will be able to manipulate the current directory, etc.
* It would be nice to force an unshare instead...
*
* Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS)
* from another sub-thread until de_thread() succeeds, this
* state is protected by cred_guard_mutex we hold.
*/
n_fs = 1;
spin_lock(&p->fs->lock);
@ -1860,7 +1865,6 @@ static int bprm_execve(struct linux_binprm *bprm)
sched_mm_cid_after_execve(current);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
rseq_execve(current);
user_events_execve(current);
@ -1879,7 +1883,6 @@ out:
force_fatal_sig(SIGSEGV);
sched_mm_cid_after_execve(current);
current->fs->in_exec = 0;
current->in_execve = 0;
return retval;