mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	 5577022f4e
			
		
	
	
		5577022f4e
		
	
	
	
	
		
			
			Using wait_for_completion() for waiting for a IO request to be executed results in wrong iowait time accounting. For example, a system having the only task doing write() and fdatasync() on a block device can be reported being idle instead of iowaiting as it should because blkdev_issue_flush() calls wait_for_completion() which in turn calls schedule() that does not increment the iowait proc counter and thus does not turn on iowait time accounting. The patch makes block layer use wait_for_completion_io() instead of wait_for_completion() where appropriate to account iowait time correctly. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
		
			
				
	
	
		
			301 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			301 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Functions related to generic helpers functions
 | |
|  */
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/module.h>
 | |
| #include <linux/bio.h>
 | |
| #include <linux/blkdev.h>
 | |
| #include <linux/scatterlist.h>
 | |
| 
 | |
| #include "blk.h"
 | |
| 
 | |
| struct bio_batch {
 | |
| 	atomic_t		done;
 | |
| 	unsigned long		flags;
 | |
| 	struct completion	*wait;
 | |
| };
 | |
| 
 | |
| static void bio_batch_end_io(struct bio *bio, int err)
 | |
| {
 | |
| 	struct bio_batch *bb = bio->bi_private;
 | |
| 
 | |
| 	if (err && (err != -EOPNOTSUPP))
 | |
| 		clear_bit(BIO_UPTODATE, &bb->flags);
 | |
| 	if (atomic_dec_and_test(&bb->done))
 | |
| 		complete(bb->wait);
 | |
| 	bio_put(bio);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * blkdev_issue_discard - queue a discard
 | |
|  * @bdev:	blockdev to issue discard for
 | |
|  * @sector:	start sector
 | |
|  * @nr_sects:	number of sectors to discard
 | |
|  * @gfp_mask:	memory allocation flags (for bio_alloc)
 | |
|  * @flags:	BLKDEV_IFL_* flags to control behaviour
 | |
|  *
 | |
|  * Description:
 | |
|  *    Issue a discard request for the sectors in question.
 | |
|  */
 | |
| int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 | |
| 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 | |
| {
 | |
| 	DECLARE_COMPLETION_ONSTACK(wait);
 | |
| 	struct request_queue *q = bdev_get_queue(bdev);
 | |
| 	int type = REQ_WRITE | REQ_DISCARD;
 | |
| 	sector_t max_discard_sectors;
 | |
| 	sector_t granularity, alignment;
 | |
| 	struct bio_batch bb;
 | |
| 	struct bio *bio;
 | |
| 	int ret = 0;
 | |
| 	struct blk_plug plug;
 | |
| 
 | |
| 	if (!q)
 | |
| 		return -ENXIO;
 | |
| 
 | |
| 	if (!blk_queue_discard(q))
 | |
| 		return -EOPNOTSUPP;
 | |
| 
 | |
| 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 | |
| 	granularity = max(q->limits.discard_granularity >> 9, 1U);
 | |
| 	alignment = bdev_discard_alignment(bdev) >> 9;
 | |
| 	alignment = sector_div(alignment, granularity);
 | |
| 
 | |
| 	/*
 | |
| 	 * Ensure that max_discard_sectors is of the proper
 | |
| 	 * granularity, so that requests stay aligned after a split.
 | |
| 	 */
 | |
| 	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 | |
| 	sector_div(max_discard_sectors, granularity);
 | |
| 	max_discard_sectors *= granularity;
 | |
| 	if (unlikely(!max_discard_sectors)) {
 | |
| 		/* Avoid infinite loop below. Being cautious never hurts. */
 | |
| 		return -EOPNOTSUPP;
 | |
| 	}
 | |
| 
 | |
| 	if (flags & BLKDEV_DISCARD_SECURE) {
 | |
| 		if (!blk_queue_secdiscard(q))
 | |
| 			return -EOPNOTSUPP;
 | |
| 		type |= REQ_SECURE;
 | |
| 	}
 | |
| 
 | |
| 	atomic_set(&bb.done, 1);
 | |
| 	bb.flags = 1 << BIO_UPTODATE;
 | |
| 	bb.wait = &wait;
 | |
| 
 | |
| 	blk_start_plug(&plug);
 | |
| 	while (nr_sects) {
 | |
| 		unsigned int req_sects;
 | |
| 		sector_t end_sect, tmp;
 | |
| 
 | |
| 		bio = bio_alloc(gfp_mask, 1);
 | |
| 		if (!bio) {
 | |
| 			ret = -ENOMEM;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		req_sects = min_t(sector_t, nr_sects, max_discard_sectors);
 | |
| 
 | |
| 		/*
 | |
| 		 * If splitting a request, and the next starting sector would be
 | |
| 		 * misaligned, stop the discard at the previous aligned sector.
 | |
| 		 */
 | |
| 		end_sect = sector + req_sects;
 | |
| 		tmp = end_sect;
 | |
| 		if (req_sects < nr_sects &&
 | |
| 		    sector_div(tmp, granularity) != alignment) {
 | |
| 			end_sect = end_sect - alignment;
 | |
| 			sector_div(end_sect, granularity);
 | |
| 			end_sect = end_sect * granularity + alignment;
 | |
| 			req_sects = end_sect - sector;
 | |
| 		}
 | |
| 
 | |
| 		bio->bi_sector = sector;
 | |
| 		bio->bi_end_io = bio_batch_end_io;
 | |
| 		bio->bi_bdev = bdev;
 | |
| 		bio->bi_private = &bb;
 | |
| 
 | |
| 		bio->bi_size = req_sects << 9;
 | |
| 		nr_sects -= req_sects;
 | |
| 		sector = end_sect;
 | |
| 
 | |
| 		atomic_inc(&bb.done);
 | |
| 		submit_bio(type, bio);
 | |
| 	}
 | |
| 	blk_finish_plug(&plug);
 | |
| 
 | |
| 	/* Wait for bios in-flight */
 | |
| 	if (!atomic_dec_and_test(&bb.done))
 | |
| 		wait_for_completion_io(&wait);
 | |
| 
 | |
| 	if (!test_bit(BIO_UPTODATE, &bb.flags))
 | |
| 		ret = -EIO;
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL(blkdev_issue_discard);
 | |
| 
 | |
| /**
 | |
|  * blkdev_issue_write_same - queue a write same operation
 | |
|  * @bdev:	target blockdev
 | |
|  * @sector:	start sector
 | |
|  * @nr_sects:	number of sectors to write
 | |
|  * @gfp_mask:	memory allocation flags (for bio_alloc)
 | |
|  * @page:	page containing data to write
 | |
|  *
 | |
|  * Description:
 | |
|  *    Issue a write same request for the sectors in question.
 | |
|  */
 | |
| int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 | |
| 			    sector_t nr_sects, gfp_t gfp_mask,
 | |
| 			    struct page *page)
 | |
| {
 | |
| 	DECLARE_COMPLETION_ONSTACK(wait);
 | |
| 	struct request_queue *q = bdev_get_queue(bdev);
 | |
| 	unsigned int max_write_same_sectors;
 | |
| 	struct bio_batch bb;
 | |
| 	struct bio *bio;
 | |
| 	int ret = 0;
 | |
| 
 | |
| 	if (!q)
 | |
| 		return -ENXIO;
 | |
| 
 | |
| 	max_write_same_sectors = q->limits.max_write_same_sectors;
 | |
| 
 | |
| 	if (max_write_same_sectors == 0)
 | |
| 		return -EOPNOTSUPP;
 | |
| 
 | |
| 	atomic_set(&bb.done, 1);
 | |
| 	bb.flags = 1 << BIO_UPTODATE;
 | |
| 	bb.wait = &wait;
 | |
| 
 | |
| 	while (nr_sects) {
 | |
| 		bio = bio_alloc(gfp_mask, 1);
 | |
| 		if (!bio) {
 | |
| 			ret = -ENOMEM;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		bio->bi_sector = sector;
 | |
| 		bio->bi_end_io = bio_batch_end_io;
 | |
| 		bio->bi_bdev = bdev;
 | |
| 		bio->bi_private = &bb;
 | |
| 		bio->bi_vcnt = 1;
 | |
| 		bio->bi_io_vec->bv_page = page;
 | |
| 		bio->bi_io_vec->bv_offset = 0;
 | |
| 		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
 | |
| 
 | |
| 		if (nr_sects > max_write_same_sectors) {
 | |
| 			bio->bi_size = max_write_same_sectors << 9;
 | |
| 			nr_sects -= max_write_same_sectors;
 | |
| 			sector += max_write_same_sectors;
 | |
| 		} else {
 | |
| 			bio->bi_size = nr_sects << 9;
 | |
| 			nr_sects = 0;
 | |
| 		}
 | |
| 
 | |
| 		atomic_inc(&bb.done);
 | |
| 		submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
 | |
| 	}
 | |
| 
 | |
| 	/* Wait for bios in-flight */
 | |
| 	if (!atomic_dec_and_test(&bb.done))
 | |
| 		wait_for_completion_io(&wait);
 | |
| 
 | |
| 	if (!test_bit(BIO_UPTODATE, &bb.flags))
 | |
| 		ret = -ENOTSUPP;
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL(blkdev_issue_write_same);
 | |
| 
 | |
| /**
 | |
|  * blkdev_issue_zeroout - generate number of zero filed write bios
 | |
|  * @bdev:	blockdev to issue
 | |
|  * @sector:	start sector
 | |
|  * @nr_sects:	number of sectors to write
 | |
|  * @gfp_mask:	memory allocation flags (for bio_alloc)
 | |
|  *
 | |
|  * Description:
 | |
|  *  Generate and issue number of bios with zerofiled pages.
 | |
|  */
 | |
| 
 | |
| int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 | |
| 			sector_t nr_sects, gfp_t gfp_mask)
 | |
| {
 | |
| 	int ret;
 | |
| 	struct bio *bio;
 | |
| 	struct bio_batch bb;
 | |
| 	unsigned int sz;
 | |
| 	DECLARE_COMPLETION_ONSTACK(wait);
 | |
| 
 | |
| 	atomic_set(&bb.done, 1);
 | |
| 	bb.flags = 1 << BIO_UPTODATE;
 | |
| 	bb.wait = &wait;
 | |
| 
 | |
| 	ret = 0;
 | |
| 	while (nr_sects != 0) {
 | |
| 		bio = bio_alloc(gfp_mask,
 | |
| 				min(nr_sects, (sector_t)BIO_MAX_PAGES));
 | |
| 		if (!bio) {
 | |
| 			ret = -ENOMEM;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		bio->bi_sector = sector;
 | |
| 		bio->bi_bdev   = bdev;
 | |
| 		bio->bi_end_io = bio_batch_end_io;
 | |
| 		bio->bi_private = &bb;
 | |
| 
 | |
| 		while (nr_sects != 0) {
 | |
| 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
 | |
| 			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
 | |
| 			nr_sects -= ret >> 9;
 | |
| 			sector += ret >> 9;
 | |
| 			if (ret < (sz << 9))
 | |
| 				break;
 | |
| 		}
 | |
| 		ret = 0;
 | |
| 		atomic_inc(&bb.done);
 | |
| 		submit_bio(WRITE, bio);
 | |
| 	}
 | |
| 
 | |
| 	/* Wait for bios in-flight */
 | |
| 	if (!atomic_dec_and_test(&bb.done))
 | |
| 		wait_for_completion_io(&wait);
 | |
| 
 | |
| 	if (!test_bit(BIO_UPTODATE, &bb.flags))
 | |
| 		/* One of bios in the batch was completed with error.*/
 | |
| 		ret = -EIO;
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * blkdev_issue_zeroout - zero-fill a block range
 | |
|  * @bdev:	blockdev to write
 | |
|  * @sector:	start sector
 | |
|  * @nr_sects:	number of sectors to write
 | |
|  * @gfp_mask:	memory allocation flags (for bio_alloc)
 | |
|  *
 | |
|  * Description:
 | |
|  *  Generate and issue number of bios with zerofiled pages.
 | |
|  */
 | |
| 
 | |
| int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 | |
| 			 sector_t nr_sects, gfp_t gfp_mask)
 | |
| {
 | |
| 	if (bdev_write_same(bdev)) {
 | |
| 		unsigned char bdn[BDEVNAME_SIZE];
 | |
| 
 | |
| 		if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
 | |
| 					     ZERO_PAGE(0)))
 | |
| 			return 0;
 | |
| 
 | |
| 		bdevname(bdev, bdn);
 | |
| 		pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
 | |
| 	}
 | |
| 
 | |
| 	return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
 | |
| }
 | |
| EXPORT_SYMBOL(blkdev_issue_zeroout);
 |