mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
btrfs: zlib: refactor S390x HW acceleration buffer preparation
Currently for s390x HW zlib compression, to get the best performance we need a buffer size which is larger than a page. This means we need to copy multiple pages into workspace->buf, then use that buffer as zlib compression input. Currently it's hardcoded using page sized folio, and all the handling are deep inside a loop. Refactor the code by: - Introduce a dedicated helper to do the buffer copy The new helper will be called copy_data_into_buffer(). - Add extra ASSERT()s * Make sure we only go into the function for hardware acceleration * Make sure we still get page sized folio - Prepare for future large folios This means we will rely on the folio size, other than PAGE_SIZE to do the copy. - Handle the folio mapping and unmapping inside the helper function For S390x hardware acceleration case, it never utilize the @data_in pointer, thus we can do folio mapping/unmapping all inside the function. Acked-by: Mikhail Zaslonko <zaslonko@linux.ibm.com> Tested-by: Mikhail Zaslonko <zaslonko@linux.ibm.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
87d6aaf79b
commit
aa60fe12b4
1 changed files with 54 additions and 28 deletions
|
@ -94,6 +94,47 @@ fail:
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper for S390x with hardware zlib compression support.
|
||||||
|
*
|
||||||
|
* That hardware acceleration requires a buffer size larger than a single page
|
||||||
|
* to get ideal performance, thus we need to do the memory copy rather than
|
||||||
|
* use the page cache directly as input buffer.
|
||||||
|
*/
|
||||||
|
static int copy_data_into_buffer(struct address_space *mapping,
|
||||||
|
struct workspace *workspace, u64 filepos,
|
||||||
|
unsigned long length)
|
||||||
|
{
|
||||||
|
u64 cur = filepos;
|
||||||
|
|
||||||
|
/* It's only for hardware accelerated zlib code. */
|
||||||
|
ASSERT(zlib_deflate_dfltcc_enabled());
|
||||||
|
|
||||||
|
while (cur < filepos + length) {
|
||||||
|
struct folio *folio;
|
||||||
|
void *data_in;
|
||||||
|
unsigned int offset;
|
||||||
|
unsigned long copy_length;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
/* No large folio support yet. */
|
||||||
|
ASSERT(!folio_test_large(folio));
|
||||||
|
|
||||||
|
offset = offset_in_folio(folio, cur);
|
||||||
|
copy_length = min(folio_size(folio) - offset,
|
||||||
|
filepos + length - cur);
|
||||||
|
|
||||||
|
data_in = kmap_local_folio(folio, offset);
|
||||||
|
memcpy(workspace->buf + cur - filepos, data_in, copy_length);
|
||||||
|
kunmap_local(data_in);
|
||||||
|
cur += copy_length;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||||
u64 start, struct folio **folios, unsigned long *out_folios,
|
u64 start, struct folio **folios, unsigned long *out_folios,
|
||||||
unsigned long *total_in, unsigned long *total_out)
|
unsigned long *total_in, unsigned long *total_out)
|
||||||
|
@ -105,8 +146,6 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||||
int nr_folios = 0;
|
int nr_folios = 0;
|
||||||
struct folio *in_folio = NULL;
|
struct folio *in_folio = NULL;
|
||||||
struct folio *out_folio = NULL;
|
struct folio *out_folio = NULL;
|
||||||
unsigned long bytes_left;
|
|
||||||
unsigned int in_buf_folios;
|
|
||||||
unsigned long len = *total_out;
|
unsigned long len = *total_out;
|
||||||
unsigned long nr_dest_folios = *out_folios;
|
unsigned long nr_dest_folios = *out_folios;
|
||||||
const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
|
const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
|
||||||
|
@ -150,34 +189,21 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||||
* the workspace buffer if required.
|
* the workspace buffer if required.
|
||||||
*/
|
*/
|
||||||
if (workspace->strm.avail_in == 0) {
|
if (workspace->strm.avail_in == 0) {
|
||||||
bytes_left = len - workspace->strm.total_in;
|
unsigned long bytes_left = len - workspace->strm.total_in;
|
||||||
in_buf_folios = min(DIV_ROUND_UP(bytes_left, PAGE_SIZE),
|
unsigned int copy_length = min(bytes_left, workspace->buf_size);
|
||||||
workspace->buf_size / PAGE_SIZE);
|
|
||||||
if (in_buf_folios > 1) {
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* S390 hardware acceleration path, not subpage. */
|
/*
|
||||||
ASSERT(!btrfs_is_subpage(
|
* This can only happen when hardware zlib compression is
|
||||||
inode_to_fs_info(mapping->host),
|
* enabled.
|
||||||
mapping));
|
*/
|
||||||
for (i = 0; i < in_buf_folios; i++) {
|
if (copy_length > PAGE_SIZE) {
|
||||||
if (data_in) {
|
ret = copy_data_into_buffer(mapping, workspace,
|
||||||
kunmap_local(data_in);
|
start, copy_length);
|
||||||
folio_put(in_folio);
|
if (ret < 0)
|
||||||
data_in = NULL;
|
goto out;
|
||||||
}
|
start += copy_length;
|
||||||
ret = btrfs_compress_filemap_get_folio(mapping,
|
|
||||||
start, &in_folio);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
data_in = kmap_local_folio(in_folio, 0);
|
|
||||||
copy_page(workspace->buf + i * PAGE_SIZE,
|
|
||||||
data_in);
|
|
||||||
start += PAGE_SIZE;
|
|
||||||
}
|
|
||||||
workspace->strm.next_in = workspace->buf;
|
workspace->strm.next_in = workspace->buf;
|
||||||
workspace->strm.avail_in = min(bytes_left,
|
workspace->strm.avail_in = copy_length;
|
||||||
in_buf_folios << PAGE_SHIFT);
|
|
||||||
} else {
|
} else {
|
||||||
unsigned int pg_off;
|
unsigned int pg_off;
|
||||||
unsigned int cur_len;
|
unsigned int cur_len;
|
||||||
|
|
Loading…
Add table
Reference in a new issue