mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	seccomp: release filter after task is fully dead
The seccomp filter used to be released in free_task() which is called asynchronously via call_rcu() and assorted mechanisms. Since we need to inform tasks waiting on the seccomp notifier when a filter goes empty we will notify them as soon as a task has been marked fully dead in release_task(). To not split seccomp cleanup into two parts, move filter release out of free_task() and into release_task() after we've unhashed struct task from struct pid, exited signals, and unlinked it from the threadgroups' thread list. We'll put the empty filter notification infrastructure into it in a follow up patch. This also renames put_seccomp_filter() to seccomp_filter_release() which is a more descriptive name of what we're doing here especially once we've added the empty filter notification mechanism in there. We're also NULL-ing the task's filter tree entrypoint which seems cleaner than leaving a dangling pointer in there. Note that this shouldn't need any memory barriers since we're calling this when the task is in release_task() which means it's EXIT_DEAD. So it can't modify its seccomp filters anymore. You can also see this from the point where we're calling seccomp_filter_release(). It's after __exit_signal() and at this point, tsk->sighand will already have been NULLed which is required for thread-sync and filter installation alike. Cc: Tycho Andersen <tycho@tycho.ws> Cc: Kees Cook <keescook@chromium.org> Cc: Matt Denton <mpdenton@google.com> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Jann Horn <jannh@google.com> Cc: Chris Palmer <palmer@google.com> Cc: Aleksa Sarai <cyphar@cyphar.com> Cc: Robert Sesek <rsesek@google.com> Cc: Jeffrey Vander Stoep <jeffv@google.com> Cc: Linux Containers <containers@lists.linux-foundation.org> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Link: https://lore.kernel.org/r/20200531115031.391515-2-christian.brauner@ubuntu.com Signed-off-by: Kees Cook <keescook@chromium.org>
This commit is contained in:
		
							parent
							
								
									b707ddee11
								
							
						
					
					
						commit
						3a15fb6ed9
					
				
					 4 changed files with 40 additions and 28 deletions
				
			
		|  | @ -84,10 +84,10 @@ static inline int seccomp_mode(struct seccomp *s) | |||
| #endif /* CONFIG_SECCOMP */ | ||||
| 
 | ||||
| #ifdef CONFIG_SECCOMP_FILTER | ||||
| extern void put_seccomp_filter(struct task_struct *tsk); | ||||
| extern void seccomp_filter_release(struct task_struct *tsk); | ||||
| extern void get_seccomp_filter(struct task_struct *tsk); | ||||
| #else  /* CONFIG_SECCOMP_FILTER */ | ||||
| static inline void put_seccomp_filter(struct task_struct *tsk) | ||||
| static inline void seccomp_filter_release(struct task_struct *tsk) | ||||
| { | ||||
| 	return; | ||||
| } | ||||
|  |  | |||
|  | @ -217,6 +217,7 @@ repeat: | |||
| 	} | ||||
| 
 | ||||
| 	write_unlock_irq(&tasklist_lock); | ||||
| 	seccomp_filter_release(p); | ||||
| 	proc_flush_pid(thread_pid); | ||||
| 	put_pid(thread_pid); | ||||
| 	release_thread(p); | ||||
|  |  | |||
|  | @ -473,7 +473,6 @@ void free_task(struct task_struct *tsk) | |||
| #endif | ||||
| 	rt_mutex_debug_task_free(tsk); | ||||
| 	ftrace_graph_exit_task(tsk); | ||||
| 	put_seccomp_filter(tsk); | ||||
| 	arch_release_task_struct(tsk); | ||||
| 	if (tsk->flags & PF_KTHREAD) | ||||
| 		free_kthread_struct(tsk); | ||||
|  |  | |||
|  | @ -368,6 +368,42 @@ static inline pid_t seccomp_can_sync_threads(void) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline void seccomp_filter_free(struct seccomp_filter *filter) | ||||
| { | ||||
| 	if (filter) { | ||||
| 		bpf_prog_destroy(filter->prog); | ||||
| 		kfree(filter); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void __put_seccomp_filter(struct seccomp_filter *orig) | ||||
| { | ||||
| 	/* Clean up single-reference branches iteratively. */ | ||||
| 	while (orig && refcount_dec_and_test(&orig->refs)) { | ||||
| 		struct seccomp_filter *freeme = orig; | ||||
| 		orig = orig->prev; | ||||
| 		seccomp_filter_free(freeme); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * seccomp_filter_release - Detach the task from its filter tree | ||||
|  *			    and drop its reference count during | ||||
|  *			    exit. | ||||
|  * | ||||
|  * This function should only be called when the task is exiting as | ||||
|  * it detaches it from its filter tree. As such, READ_ONCE() and | ||||
|  * barriers are not needed here, as would normally be needed. | ||||
|  */ | ||||
| void seccomp_filter_release(struct task_struct *tsk) | ||||
| { | ||||
| 	struct seccomp_filter *orig = tsk->seccomp.filter; | ||||
| 
 | ||||
| 	/* Detach task from its filter tree. */ | ||||
| 	tsk->seccomp.filter = NULL; | ||||
| 	__put_seccomp_filter(orig); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * seccomp_sync_threads: sets all threads to use current's filter | ||||
|  * | ||||
|  | @ -397,7 +433,7 @@ static inline void seccomp_sync_threads(unsigned long flags) | |||
| 		 * current's path will hold a reference.  (This also | ||||
| 		 * allows a put before the assignment.) | ||||
| 		 */ | ||||
| 		put_seccomp_filter(thread); | ||||
| 		__put_seccomp_filter(thread->seccomp.filter); | ||||
| 		smp_store_release(&thread->seccomp.filter, | ||||
| 				  caller->seccomp.filter); | ||||
| 		atomic_set(&thread->seccomp.filter_count, | ||||
|  | @ -571,30 +607,6 @@ void get_seccomp_filter(struct task_struct *tsk) | |||
| 	__get_seccomp_filter(orig); | ||||
| } | ||||
| 
 | ||||
| static inline void seccomp_filter_free(struct seccomp_filter *filter) | ||||
| { | ||||
| 	if (filter) { | ||||
| 		bpf_prog_destroy(filter->prog); | ||||
| 		kfree(filter); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void __put_seccomp_filter(struct seccomp_filter *orig) | ||||
| { | ||||
| 	/* Clean up single-reference branches iteratively. */ | ||||
| 	while (orig && refcount_dec_and_test(&orig->refs)) { | ||||
| 		struct seccomp_filter *freeme = orig; | ||||
| 		orig = orig->prev; | ||||
| 		seccomp_filter_free(freeme); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ | ||||
| void put_seccomp_filter(struct task_struct *tsk) | ||||
| { | ||||
| 	__put_seccomp_filter(tsk->seccomp.filter); | ||||
| } | ||||
| 
 | ||||
| static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason) | ||||
| { | ||||
| 	clear_siginfo(info); | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Christian Brauner
						Christian Brauner