mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
ext4: avoid unnecessary transaction stalls during writeback
Currently ext4_writepages() submits all pages with transaction started. When no page needs block allocation or extent conversion we can submit all dirty pages in the inode while holding a single transaction handle and when device is congested this can take significant amount of time. Thus ext4_writepages() can block transaction commits for extended periods of time. Take for example a simple benchmark simulating PostgreSQL database (pgioperf in mmtest). The benchmark runs 16 processes doing random reads from a huge file, one process doing random writes to the huge file, and one process doing sequential writes to a small files and frequently running fsync. With unpatched kernel transaction commits take on average ~18s with standard deviation of ~41s, top 5 commit times are: 274.466639s, 126.467347s, 86.992429s, 34.351563s, 31.517653s. After this patch transaction commits take on average 0.1s with standard deviation of 0.15s, top 5 commit times are: 0.563792s, 0.519980s, 0.509841s, 0.471700s, 0.469899s [ Modified so we use an explicit do_map flag instead of relying on io_end not being allocated, the since io_end->inode is needed for I/O error handling. -- tytso ] Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
85c8f176a6
commit
dddbd6ac8f
1 changed files with 35 additions and 0 deletions
|
@ -1643,6 +1643,7 @@ struct mpage_da_data {
|
||||||
*/
|
*/
|
||||||
struct ext4_map_blocks map;
|
struct ext4_map_blocks map;
|
||||||
struct ext4_io_submit io_submit; /* IO submission data */
|
struct ext4_io_submit io_submit; /* IO submission data */
|
||||||
|
unsigned int do_map:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void mpage_release_unused_pages(struct mpage_da_data *mpd,
|
static void mpage_release_unused_pages(struct mpage_da_data *mpd,
|
||||||
|
@ -2179,6 +2180,9 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
|
||||||
|
|
||||||
/* First block in the extent? */
|
/* First block in the extent? */
|
||||||
if (map->m_len == 0) {
|
if (map->m_len == 0) {
|
||||||
|
/* We cannot map unless handle is started... */
|
||||||
|
if (!mpd->do_map)
|
||||||
|
return false;
|
||||||
map->m_lblk = lblk;
|
map->m_lblk = lblk;
|
||||||
map->m_len = 1;
|
map->m_len = 1;
|
||||||
map->m_flags = bh->b_state & BH_FLAGS;
|
map->m_flags = bh->b_state & BH_FLAGS;
|
||||||
|
@ -2231,6 +2235,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
|
||||||
/* Found extent to map? */
|
/* Found extent to map? */
|
||||||
if (mpd->map.m_len)
|
if (mpd->map.m_len)
|
||||||
return 0;
|
return 0;
|
||||||
|
/* Buffer needs mapping and handle is not started? */
|
||||||
|
if (!mpd->do_map)
|
||||||
|
return 0;
|
||||||
/* Everything mapped so far and we hit EOF */
|
/* Everything mapped so far and we hit EOF */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2747,6 +2754,29 @@ retry:
|
||||||
tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
|
tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
|
||||||
done = false;
|
done = false;
|
||||||
blk_start_plug(&plug);
|
blk_start_plug(&plug);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First writeback pages that don't need mapping - we can avoid
|
||||||
|
* starting a transaction unnecessarily and also avoid being blocked
|
||||||
|
* in the block layer on device congestion while having transaction
|
||||||
|
* started.
|
||||||
|
*/
|
||||||
|
mpd.do_map = 0;
|
||||||
|
mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
|
||||||
|
if (!mpd.io_submit.io_end) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto unplug;
|
||||||
|
}
|
||||||
|
ret = mpage_prepare_extent_to_map(&mpd);
|
||||||
|
/* Submit prepared bio */
|
||||||
|
ext4_io_submit(&mpd.io_submit);
|
||||||
|
ext4_put_io_end_defer(mpd.io_submit.io_end);
|
||||||
|
mpd.io_submit.io_end = NULL;
|
||||||
|
/* Unlock pages we didn't use */
|
||||||
|
mpage_release_unused_pages(&mpd, false);
|
||||||
|
if (ret < 0)
|
||||||
|
goto unplug;
|
||||||
|
|
||||||
while (!done && mpd.first_page <= mpd.last_page) {
|
while (!done && mpd.first_page <= mpd.last_page) {
|
||||||
/* For each extent of pages we use new io_end */
|
/* For each extent of pages we use new io_end */
|
||||||
mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
|
mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
|
||||||
|
@ -2775,8 +2805,10 @@ retry:
|
||||||
wbc->nr_to_write, inode->i_ino, ret);
|
wbc->nr_to_write, inode->i_ino, ret);
|
||||||
/* Release allocated io_end */
|
/* Release allocated io_end */
|
||||||
ext4_put_io_end(mpd.io_submit.io_end);
|
ext4_put_io_end(mpd.io_submit.io_end);
|
||||||
|
mpd.io_submit.io_end = NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
mpd.do_map = 1;
|
||||||
|
|
||||||
trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
|
trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
|
||||||
ret = mpage_prepare_extent_to_map(&mpd);
|
ret = mpage_prepare_extent_to_map(&mpd);
|
||||||
|
@ -2807,6 +2839,7 @@ retry:
|
||||||
if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
|
if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
handle = NULL;
|
handle = NULL;
|
||||||
|
mpd.do_map = 0;
|
||||||
}
|
}
|
||||||
/* Submit prepared bio */
|
/* Submit prepared bio */
|
||||||
ext4_io_submit(&mpd.io_submit);
|
ext4_io_submit(&mpd.io_submit);
|
||||||
|
@ -2824,6 +2857,7 @@ retry:
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
} else
|
} else
|
||||||
ext4_put_io_end(mpd.io_submit.io_end);
|
ext4_put_io_end(mpd.io_submit.io_end);
|
||||||
|
mpd.io_submit.io_end = NULL;
|
||||||
|
|
||||||
if (ret == -ENOSPC && sbi->s_journal) {
|
if (ret == -ENOSPC && sbi->s_journal) {
|
||||||
/*
|
/*
|
||||||
|
@ -2839,6 +2873,7 @@ retry:
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
unplug:
|
||||||
blk_finish_plug(&plug);
|
blk_finish_plug(&plug);
|
||||||
if (!ret && !cycled && wbc->nr_to_write > 0) {
|
if (!ret && !cycled && wbc->nr_to_write > 0) {
|
||||||
cycled = 1;
|
cycled = 1;
|
||||||
|
|
Loading…
Add table
Reference in a new issue