mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	 9d94c04c0d
			
		
	
	
		9d94c04c0d
		
	
	
	
	
		
			
			There is an interesting reference bug when -ENOMEM occurs in calling of io_install_fixed_file(). KASan report like below: [ 14.057131] ================================================================== [ 14.059161] BUG: KASAN: use-after-free in unix_get_socket+0x10/0x90 [ 14.060975] Read of size 8 at addr ffff88800b09cf20 by task kworker/u8:2/45 [ 14.062684] [ 14.062768] CPU: 2 PID: 45 Comm: kworker/u8:2 Not tainted 6.1.0-rc4 #1 [ 14.063099] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 [ 14.063666] Workqueue: events_unbound io_ring_exit_work [ 14.063936] Call Trace: [ 14.064065] <TASK> [ 14.064175] dump_stack_lvl+0x34/0x48 [ 14.064360] print_report+0x172/0x475 [ 14.064547] ? _raw_spin_lock_irq+0x83/0xe0 [ 14.064758] ? __virt_addr_valid+0xef/0x170 [ 14.064975] ? unix_get_socket+0x10/0x90 [ 14.065167] kasan_report+0xad/0x130 [ 14.065353] ? unix_get_socket+0x10/0x90 [ 14.065553] unix_get_socket+0x10/0x90 [ 14.065744] __io_sqe_files_unregister+0x87/0x1e0 [ 14.065989] ? io_rsrc_refs_drop+0x1c/0xd0 [ 14.066199] io_ring_exit_work+0x388/0x6a5 [ 14.066410] ? io_uring_try_cancel_requests+0x5bf/0x5bf [ 14.066674] ? try_to_wake_up+0xdb/0x910 [ 14.066873] ? virt_to_head_page+0xbe/0xbe [ 14.067080] ? __schedule+0x574/0xd20 [ 14.067273] ? read_word_at_a_time+0xe/0x20 [ 14.067492] ? strscpy+0xb5/0x190 [ 14.067665] process_one_work+0x423/0x710 [ 14.067879] worker_thread+0x2a2/0x6f0 [ 14.068073] ? process_one_work+0x710/0x710 [ 14.068284] kthread+0x163/0x1a0 [ 14.068454] ? kthread_complete_and_exit+0x20/0x20 [ 14.068697] ret_from_fork+0x22/0x30 [ 14.068886] </TASK> [ 14.069000] [ 14.069088] Allocated by task 289: [ 14.069269] kasan_save_stack+0x1e/0x40 [ 14.069463] kasan_set_track+0x21/0x30 [ 14.069652] __kasan_slab_alloc+0x58/0x70 [ 14.069899] kmem_cache_alloc+0xc5/0x200 [ 14.070100] __alloc_file+0x20/0x160 [ 14.070283] alloc_empty_file+0x3b/0xc0 [ 14.070479] path_openat+0xc3/0x1770 [ 14.070689] do_filp_open+0x150/0x270 [ 14.070888] do_sys_openat2+0x113/0x270 [ 14.071081] __x64_sys_openat+0xc8/0x140 [ 14.071283] do_syscall_64+0x3b/0x90 [ 14.071466] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 14.071791] [ 14.071874] Freed by task 0: [ 14.072027] kasan_save_stack+0x1e/0x40 [ 14.072224] kasan_set_track+0x21/0x30 [ 14.072415] kasan_save_free_info+0x2a/0x50 [ 14.072627] __kasan_slab_free+0x106/0x190 [ 14.072858] kmem_cache_free+0x98/0x340 [ 14.073075] rcu_core+0x427/0xe50 [ 14.073249] __do_softirq+0x110/0x3cd [ 14.073440] [ 14.073523] Last potentially related work creation: [ 14.073801] kasan_save_stack+0x1e/0x40 [ 14.074017] __kasan_record_aux_stack+0x97/0xb0 [ 14.074264] call_rcu+0x41/0x550 [ 14.074436] task_work_run+0xf4/0x170 [ 14.074619] exit_to_user_mode_prepare+0x113/0x120 [ 14.074858] syscall_exit_to_user_mode+0x1d/0x40 [ 14.075092] do_syscall_64+0x48/0x90 [ 14.075272] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 14.075529] [ 14.075612] Second to last potentially related work creation: [ 14.075900] kasan_save_stack+0x1e/0x40 [ 14.076098] __kasan_record_aux_stack+0x97/0xb0 [ 14.076325] task_work_add+0x72/0x1b0 [ 14.076512] fput+0x65/0xc0 [ 14.076657] filp_close+0x8e/0xa0 [ 14.076825] __x64_sys_close+0x15/0x50 [ 14.077019] do_syscall_64+0x3b/0x90 [ 14.077199] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 14.077448] [ 14.077530] The buggy address belongs to the object at ffff88800b09cf00 [ 14.077530] which belongs to the cache filp of size 232 [ 14.078105] The buggy address is located 32 bytes inside of [ 14.078105] 232-byte region [ffff88800b09cf00, ffff88800b09cfe8) [ 14.078685] [ 14.078771] The buggy address belongs to the physical page: [ 14.079046] page:000000001bd520e7 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88800b09de00 pfn:0xb09c [ 14.079575] head:000000001bd520e7 order:1 compound_mapcount:0 compound_pincount:0 [ 14.079946] flags: 0x100000000010200(slab|head|node=0|zone=1) [ 14.080244] raw: 0100000000010200 0000000000000000 dead000000000001 ffff88800493cc80 [ 14.080629] raw: ffff88800b09de00 0000000080190018 00000001ffffffff 0000000000000000 [ 14.081016] page dumped because: kasan: bad access detected [ 14.081293] [ 14.081376] Memory state around the buggy address: [ 14.081618] ffff88800b09ce00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 14.081974] ffff88800b09ce80: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 14.082336] >ffff88800b09cf00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 14.082690] ^ [ 14.082909] ffff88800b09cf80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc [ 14.083266] ffff88800b09d000: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb [ 14.083622] ================================================================== The actual tracing of this bug is shown below: commit8c71fe7502("io_uring: ensure fput() called correspondingly when direct install fails") adds an additional fput() in io_fixed_fd_install() when io_file_bitmap_get() returns error values. In that case, the routine will never make it to io_install_fixed_file() due to an early return. static int io_fixed_fd_install(...) { if (alloc_slot) { ... ret = io_file_bitmap_get(ctx); if (unlikely(ret < 0)) { io_ring_submit_unlock(ctx, issue_flags); fput(file); return ret; } ... } ... ret = io_install_fixed_file(req, file, issue_flags, file_slot); ... } In the above scenario, the reference is okay as io_fixed_fd_install() ensures the fput() is called when something bad happens, either via bitmap or via inner io_install_fixed_file(). However, the commit61c1b44a21("io_uring: fix deadlock on iowq file slot alloc") breaks the balance because it places fput() into the common path for both io_file_bitmap_get() and io_install_fixed_file(). Since io_install_fixed_file() handles the fput() itself, the reference underflow come across then. There are some extra commits make the current code into io_fixed_fd_install() -> __io_fixed_fd_install() -> io_install_fixed_file() However, the fact that there is an extra fput() is called if io_install_fixed_file() calls fput(). Traversing through the code, I find that the existing two callers to __io_fixed_fd_install(): io_fixed_fd_install() and io_msg_send_fd() have fput() when handling error return, this patch simply removes the fput() in io_install_fixed_file() to fix the bug. Fixes:61c1b44a21("io_uring: fix deadlock on iowq file slot alloc") Signed-off-by: Lin Ma <linma@zju.edu.cn> Link: https://lore.kernel.org/r/be4ba4b.5d44.184a0a406a4.Coremail.linma@zju.edu.cn Signed-off-by: Jens Axboe <axboe@kernel.dk>
		
			
				
	
	
		
			191 lines
		
	
	
	
		
			4.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			191 lines
		
	
	
	
		
			4.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/errno.h>
 | |
| #include <linux/file.h>
 | |
| #include <linux/mm.h>
 | |
| #include <linux/slab.h>
 | |
| #include <linux/nospec.h>
 | |
| #include <linux/io_uring.h>
 | |
| 
 | |
| #include <uapi/linux/io_uring.h>
 | |
| 
 | |
| #include "io_uring.h"
 | |
| #include "rsrc.h"
 | |
| #include "filetable.h"
 | |
| 
 | |
| static int io_file_bitmap_get(struct io_ring_ctx *ctx)
 | |
| {
 | |
| 	struct io_file_table *table = &ctx->file_table;
 | |
| 	unsigned long nr = ctx->file_alloc_end;
 | |
| 	int ret;
 | |
| 
 | |
| 	do {
 | |
| 		ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
 | |
| 		if (ret != nr)
 | |
| 			return ret;
 | |
| 
 | |
| 		if (table->alloc_hint == ctx->file_alloc_start)
 | |
| 			break;
 | |
| 		nr = table->alloc_hint;
 | |
| 		table->alloc_hint = ctx->file_alloc_start;
 | |
| 	} while (1);
 | |
| 
 | |
| 	return -ENFILE;
 | |
| }
 | |
| 
 | |
| bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
 | |
| {
 | |
| 	table->files = kvcalloc(nr_files, sizeof(table->files[0]),
 | |
| 				GFP_KERNEL_ACCOUNT);
 | |
| 	if (unlikely(!table->files))
 | |
| 		return false;
 | |
| 
 | |
| 	table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
 | |
| 	if (unlikely(!table->bitmap)) {
 | |
| 		kvfree(table->files);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| void io_free_file_tables(struct io_file_table *table)
 | |
| {
 | |
| 	kvfree(table->files);
 | |
| 	bitmap_free(table->bitmap);
 | |
| 	table->files = NULL;
 | |
| 	table->bitmap = NULL;
 | |
| }
 | |
| 
 | |
| static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
 | |
| 				 u32 slot_index)
 | |
| 	__must_hold(&req->ctx->uring_lock)
 | |
| {
 | |
| 	bool needs_switch = false;
 | |
| 	struct io_fixed_file *file_slot;
 | |
| 	int ret;
 | |
| 
 | |
| 	if (io_is_uring_fops(file))
 | |
| 		return -EBADF;
 | |
| 	if (!ctx->file_data)
 | |
| 		return -ENXIO;
 | |
| 	if (slot_index >= ctx->nr_user_files)
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
 | |
| 	file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
 | |
| 
 | |
| 	if (file_slot->file_ptr) {
 | |
| 		struct file *old_file;
 | |
| 
 | |
| 		ret = io_rsrc_node_switch_start(ctx);
 | |
| 		if (ret)
 | |
| 			goto err;
 | |
| 
 | |
| 		old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
 | |
| 		ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
 | |
| 					    ctx->rsrc_node, old_file);
 | |
| 		if (ret)
 | |
| 			goto err;
 | |
| 		file_slot->file_ptr = 0;
 | |
| 		io_file_bitmap_clear(&ctx->file_table, slot_index);
 | |
| 		needs_switch = true;
 | |
| 	}
 | |
| 
 | |
| 	ret = io_scm_file_account(ctx, file);
 | |
| 	if (!ret) {
 | |
| 		*io_get_tag_slot(ctx->file_data, slot_index) = 0;
 | |
| 		io_fixed_file_set(file_slot, file);
 | |
| 		io_file_bitmap_set(&ctx->file_table, slot_index);
 | |
| 	}
 | |
| err:
 | |
| 	if (needs_switch)
 | |
| 		io_rsrc_node_switch(ctx, ctx->file_data);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
 | |
| 			  unsigned int file_slot)
 | |
| {
 | |
| 	bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
 | |
| 	int ret;
 | |
| 
 | |
| 	if (alloc_slot) {
 | |
| 		ret = io_file_bitmap_get(ctx);
 | |
| 		if (unlikely(ret < 0))
 | |
| 			return ret;
 | |
| 		file_slot = ret;
 | |
| 	} else {
 | |
| 		file_slot--;
 | |
| 	}
 | |
| 
 | |
| 	ret = io_install_fixed_file(ctx, file, file_slot);
 | |
| 	if (!ret && alloc_slot)
 | |
| 		ret = file_slot;
 | |
| 	return ret;
 | |
| }
 | |
| /*
 | |
|  * Note when io_fixed_fd_install() returns error value, it will ensure
 | |
|  * fput() is called correspondingly.
 | |
|  */
 | |
| int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
 | |
| 			struct file *file, unsigned int file_slot)
 | |
| {
 | |
| 	struct io_ring_ctx *ctx = req->ctx;
 | |
| 	int ret;
 | |
| 
 | |
| 	io_ring_submit_lock(ctx, issue_flags);
 | |
| 	ret = __io_fixed_fd_install(ctx, file, file_slot);
 | |
| 	io_ring_submit_unlock(ctx, issue_flags);
 | |
| 
 | |
| 	if (unlikely(ret < 0))
 | |
| 		fput(file);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
 | |
| {
 | |
| 	struct io_fixed_file *file_slot;
 | |
| 	struct file *file;
 | |
| 	int ret;
 | |
| 
 | |
| 	if (unlikely(!ctx->file_data))
 | |
| 		return -ENXIO;
 | |
| 	if (offset >= ctx->nr_user_files)
 | |
| 		return -EINVAL;
 | |
| 	ret = io_rsrc_node_switch_start(ctx);
 | |
| 	if (ret)
 | |
| 		return ret;
 | |
| 
 | |
| 	offset = array_index_nospec(offset, ctx->nr_user_files);
 | |
| 	file_slot = io_fixed_file_slot(&ctx->file_table, offset);
 | |
| 	if (!file_slot->file_ptr)
 | |
| 		return -EBADF;
 | |
| 
 | |
| 	file = (struct file *)(file_slot->file_ptr & FFS_MASK);
 | |
| 	ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
 | |
| 	if (ret)
 | |
| 		return ret;
 | |
| 
 | |
| 	file_slot->file_ptr = 0;
 | |
| 	io_file_bitmap_clear(&ctx->file_table, offset);
 | |
| 	io_rsrc_node_switch(ctx, ctx->file_data);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| int io_register_file_alloc_range(struct io_ring_ctx *ctx,
 | |
| 				 struct io_uring_file_index_range __user *arg)
 | |
| {
 | |
| 	struct io_uring_file_index_range range;
 | |
| 	u32 end;
 | |
| 
 | |
| 	if (copy_from_user(&range, arg, sizeof(range)))
 | |
| 		return -EFAULT;
 | |
| 	if (check_add_overflow(range.off, range.len, &end))
 | |
| 		return -EOVERFLOW;
 | |
| 	if (range.resv || end > ctx->nr_user_files)
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	io_file_table_set_alloc_range(ctx, range.off, range.len);
 | |
| 	return 0;
 | |
| }
 |