mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	blk-iolatency: fix IO hang due to negative inflight counter
Our test reported the following stack, and vmcore showed that ->inflight counter is -1. [ffffc9003fcc38d0] __schedule at ffffffff8173d95d [ffffc9003fcc3958] schedule at ffffffff8173de26 [ffffc9003fcc3970] io_schedule at ffffffff810bb6b6 [ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb [ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3 [ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a [ffffc9003fcc3b08] generic_make_request at ffffffff81368b49 [ffffc9003fcc3b68] submit_bio at ffffffff81368d7d [ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4] [ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4] [ffffc9003fcc3d68] do_writepages at ffffffff811c49ae [ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188 [ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301 [ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4] [ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b [ffffc9003fcc3ee8] do_fsync at ffffffff81285abd [ffffc9003fcc3f18] sys_fsync at ffffffff81285d50 [ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04 [ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e The ->inflight counter may be negative (-1) if 1) blk-iolatency was disabled when the IO was issued, 2) blk-iolatency was enabled before this IO reached its endio, 3) the ->inflight counter is decreased from 0 to -1 in endio() In fact the hang can be easily reproduced by the below script, H=/sys/fs/cgroup/unified/ P=/sys/fs/cgroup/unified/test echo "+io" > $H/cgroup.subtree_control mkdir -p $P echo $$ > $P/cgroup.procs xfs_io -f -d -c "pwrite 0 4k" /dev/sdg echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency xfs_io -f -d -c "pwrite 0 4k" /dev/sdg This fixes the problem by freezing the queue so that while enabling/disabling iolatency, there is no inflight rq running. Note that quiesce_queue is not needed as this only updating iolatency configuration about which dispatching request_queue doesn't care. Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									0803de7804
								
							
						
					
					
						commit
						8c772a9bfc
					
				
					 1 changed files with 45 additions and 7 deletions
				
			
		|  | @ -72,6 +72,7 @@ | |||
| #include <linux/sched/loadavg.h> | ||||
| #include <linux/sched/signal.h> | ||||
| #include <trace/events/block.h> | ||||
| #include <linux/blk-mq.h> | ||||
| #include "blk-rq-qos.h" | ||||
| #include "blk-stat.h" | ||||
| 
 | ||||
|  | @ -601,6 +602,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) | |||
| 		return; | ||||
| 
 | ||||
| 	enabled = blk_iolatency_enabled(iolat->blkiolat); | ||||
| 	if (!enabled) | ||||
| 		return; | ||||
| 
 | ||||
| 	while (blkg && blkg->parent) { | ||||
| 		iolat = blkg_to_lat(blkg); | ||||
| 		if (!iolat) { | ||||
|  | @ -610,7 +614,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) | |||
| 		rqw = &iolat->rq_wait; | ||||
| 
 | ||||
| 		atomic_dec(&rqw->inflight); | ||||
| 		if (!enabled || iolat->min_lat_nsec == 0) | ||||
| 		if (iolat->min_lat_nsec == 0) | ||||
| 			goto next; | ||||
| 		iolatency_record_time(iolat, &bio->bi_issue, now, | ||||
| 				      issue_as_root); | ||||
|  | @ -754,10 +758,13 @@ int blk_iolatency_init(struct request_queue *q) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) | ||||
| /*
 | ||||
|  * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise | ||||
|  * return 0. | ||||
|  */ | ||||
| static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) | ||||
| { | ||||
| 	struct iolatency_grp *iolat = blkg_to_lat(blkg); | ||||
| 	struct blk_iolatency *blkiolat = iolat->blkiolat; | ||||
| 	u64 oldval = iolat->min_lat_nsec; | ||||
| 
 | ||||
| 	iolat->min_lat_nsec = val; | ||||
|  | @ -766,9 +773,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) | |||
| 				    BLKIOLATENCY_MAX_WIN_SIZE); | ||||
| 
 | ||||
| 	if (!oldval && val) | ||||
| 		atomic_inc(&blkiolat->enabled); | ||||
| 		return 1; | ||||
| 	if (oldval && !val) | ||||
| 		atomic_dec(&blkiolat->enabled); | ||||
| 		return -1; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void iolatency_clear_scaling(struct blkcg_gq *blkg) | ||||
|  | @ -800,6 +808,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, | |||
| 	u64 lat_val = 0; | ||||
| 	u64 oldval; | ||||
| 	int ret; | ||||
| 	int enable = 0; | ||||
| 
 | ||||
| 	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); | ||||
| 	if (ret) | ||||
|  | @ -834,7 +843,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, | |||
| 	blkg = ctx.blkg; | ||||
| 	oldval = iolat->min_lat_nsec; | ||||
| 
 | ||||
| 	iolatency_set_min_lat_nsec(blkg, lat_val); | ||||
| 	enable = iolatency_set_min_lat_nsec(blkg, lat_val); | ||||
| 	if (enable) { | ||||
| 		WARN_ON_ONCE(!blk_get_queue(blkg->q)); | ||||
| 		blkg_get(blkg); | ||||
| 	} | ||||
| 
 | ||||
| 	if (oldval != iolat->min_lat_nsec) { | ||||
| 		iolatency_clear_scaling(blkg); | ||||
| 	} | ||||
|  | @ -842,6 +856,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, | |||
| 	ret = 0; | ||||
| out: | ||||
| 	blkg_conf_finish(&ctx); | ||||
| 	if (ret == 0 && enable) { | ||||
| 		struct iolatency_grp *tmp = blkg_to_lat(blkg); | ||||
| 		struct blk_iolatency *blkiolat = tmp->blkiolat; | ||||
| 
 | ||||
| 		blk_mq_freeze_queue(blkg->q); | ||||
| 
 | ||||
| 		if (enable == 1) | ||||
| 			atomic_inc(&blkiolat->enabled); | ||||
| 		else if (enable == -1) | ||||
| 			atomic_dec(&blkiolat->enabled); | ||||
| 		else | ||||
| 			WARN_ON_ONCE(1); | ||||
| 
 | ||||
| 		blk_mq_unfreeze_queue(blkg->q); | ||||
| 
 | ||||
| 		blkg_put(blkg); | ||||
| 		blk_put_queue(blkg->q); | ||||
| 	} | ||||
| 	return ret ?: nbytes; | ||||
| } | ||||
| 
 | ||||
|  | @ -977,8 +1009,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) | |||
| { | ||||
| 	struct iolatency_grp *iolat = pd_to_lat(pd); | ||||
| 	struct blkcg_gq *blkg = lat_to_blkg(iolat); | ||||
| 	struct blk_iolatency *blkiolat = iolat->blkiolat; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	iolatency_set_min_lat_nsec(blkg, 0); | ||||
| 	ret = iolatency_set_min_lat_nsec(blkg, 0); | ||||
| 	if (ret == 1) | ||||
| 		atomic_inc(&blkiolat->enabled); | ||||
| 	if (ret == -1) | ||||
| 		atomic_dec(&blkiolat->enabled); | ||||
| 	iolatency_clear_scaling(blkg); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Liu Bo
						Liu Bo