mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-05-24 10:39:52 +00:00

Allow using the fast modes (negative compression levels) of zstd as a mount option. As per the results, the compression ratio is (expectedly) lower: for level in {-15..-1} 1 2 3; \ do printf "level %3d\n" $level; \ mount -o compress=zstd:$level /dev/sdb /mnt/test/; \ grep sdb /proc/mounts; \ cp -r /usr/bin /mnt/test/; sync; compsize /mnt/test/bin; \ cp -r /usr/share/doc /mnt/test/; sync; compsize /mnt/test/doc; \ cp enwik9 /mnt/test/; sync; compsize /mnt/test/enwik9; \ cp linux-6.13.tar /mnt/test/; sync; compsize /mnt/test/linux-6.13.tar; \ rm -r /mnt/test/{bin,doc,enwik9,linux-6.13.tar}; \ umount /mnt/test/; \ done |& tee results | \ awk '/^level/{print}/^TOTAL/{print$3"\t"$2" |"}' | paste - - - - - 266M bin | 45M doc | 953M wiki | 1.4G source =============================+===============+===============+===============+ level -15 180M 67% | 30M 68% | 694M 72% | 598M 40% | level -14 180M 67% | 30M 67% | 683M 71% | 581M 39% | level -13 177M 66% | 29M 66% | 671M 70% | 566M 38% | level -12 174M 65% | 29M 65% | 658M 69% | 548M 37% | level -11 174M 65% | 28M 64% | 645M 67% | 530M 35% | level -10 171M 64% | 28M 62% | 631M 66% | 512M 34% | level -9 165M 62% | 27M 61% | 615M 64% | 493M 33% | level -8 161M 60% | 27M 59% | 598M 62% | 475M 32% | level -7 155M 58% | 26M 58% | 582M 61% | 457M 30% | level -6 151M 56% | 25M 56% | 565M 59% | 437M 29% | level -5 145M 54% | 24M 55% | 545M 57% | 417M 28% | level -4 139M 52% | 23M 52% | 520M 54% | 391M 26% | level -3 135M 50% | 22M 50% | 495M 51% | 369M 24% | level -2 127M 47% | 22M 48% | 470M 49% | 349M 23% | level -1 120M 45% | 21M 47% | 452M 47% | 332M 22% | level 1 110M 41% | 17M 39% | 362M 38% | 290M 19% | level 2 106M 40% | 17M 38% | 349M 36% | 288M 19% | level 3 104M 39% | 16M 37% | 340M 35% | 276M 18% | The samples represent some data sets that can be commonly found and show approximate compressibility. The fast levels trade off speed for ratio and are best suitable for highly compressible data. As can be seen above, comparing the results to the current default zstd level 3, the negative levels are roughly 2x worse at -15 and the ratio increases almost linearly with each level. Signed-off-by: Daniel Vacek <neelx@suse.com> [ update changelog ] Signed-off-by: David Sterba <dsterba@suse.com>
495 lines
13 KiB
C
495 lines
13 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
|
*
|
|
* Based on jffs2 zlib code:
|
|
* Copyright © 2001-2007 Red Hat, Inc.
|
|
* Created by David Woodhouse <dwmw2@infradead.org>
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/zlib.h>
|
|
#include <linux/zutil.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/init.h>
|
|
#include <linux/err.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/refcount.h>
|
|
#include "btrfs_inode.h"
|
|
#include "compression.h"
|
|
#include "fs.h"
|
|
#include "subpage.h"
|
|
|
|
/* workspace buffer size for s390 zlib hardware support */
|
|
#define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE)
|
|
|
|
struct workspace {
|
|
z_stream strm;
|
|
char *buf;
|
|
unsigned int buf_size;
|
|
struct list_head list;
|
|
int level;
|
|
};
|
|
|
|
static struct workspace_manager wsm;
|
|
|
|
struct list_head *zlib_get_workspace(unsigned int level)
|
|
{
|
|
struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
|
|
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
|
|
|
workspace->level = level;
|
|
|
|
return ws;
|
|
}
|
|
|
|
void zlib_free_workspace(struct list_head *ws)
|
|
{
|
|
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
|
|
|
kvfree(workspace->strm.workspace);
|
|
kfree(workspace->buf);
|
|
kfree(workspace);
|
|
}
|
|
|
|
struct list_head *zlib_alloc_workspace(unsigned int level)
|
|
{
|
|
struct workspace *workspace;
|
|
int workspacesize;
|
|
|
|
workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
|
|
if (!workspace)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
|
|
zlib_inflate_workspacesize());
|
|
workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
|
|
workspace->level = level;
|
|
workspace->buf = NULL;
|
|
/*
|
|
* In case of s390 zlib hardware support, allocate lager workspace
|
|
* buffer. If allocator fails, fall back to a single page buffer.
|
|
*/
|
|
if (zlib_deflate_dfltcc_enabled()) {
|
|
workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
|
|
__GFP_NOMEMALLOC | __GFP_NORETRY |
|
|
__GFP_NOWARN | GFP_NOIO);
|
|
workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
|
|
}
|
|
if (!workspace->buf) {
|
|
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
|
workspace->buf_size = PAGE_SIZE;
|
|
}
|
|
if (!workspace->strm.workspace || !workspace->buf)
|
|
goto fail;
|
|
|
|
INIT_LIST_HEAD(&workspace->list);
|
|
|
|
return &workspace->list;
|
|
fail:
|
|
zlib_free_workspace(&workspace->list);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
/*
|
|
* Helper for S390x with hardware zlib compression support.
|
|
*
|
|
* That hardware acceleration requires a buffer size larger than a single page
|
|
* to get ideal performance, thus we need to do the memory copy rather than
|
|
* use the page cache directly as input buffer.
|
|
*/
|
|
static int copy_data_into_buffer(struct address_space *mapping,
|
|
struct workspace *workspace, u64 filepos,
|
|
unsigned long length)
|
|
{
|
|
u64 cur = filepos;
|
|
|
|
/* It's only for hardware accelerated zlib code. */
|
|
ASSERT(zlib_deflate_dfltcc_enabled());
|
|
|
|
while (cur < filepos + length) {
|
|
struct folio *folio;
|
|
void *data_in;
|
|
unsigned int offset;
|
|
unsigned long copy_length;
|
|
int ret;
|
|
|
|
ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
|
|
if (ret < 0)
|
|
return ret;
|
|
/* No large folio support yet. */
|
|
ASSERT(!folio_test_large(folio));
|
|
|
|
offset = offset_in_folio(folio, cur);
|
|
copy_length = min(folio_size(folio) - offset,
|
|
filepos + length - cur);
|
|
|
|
data_in = kmap_local_folio(folio, offset);
|
|
memcpy(workspace->buf + cur - filepos, data_in, copy_length);
|
|
kunmap_local(data_in);
|
|
cur += copy_length;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
|
u64 start, struct folio **folios, unsigned long *out_folios,
|
|
unsigned long *total_in, unsigned long *total_out)
|
|
{
|
|
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
|
int ret;
|
|
char *data_in = NULL;
|
|
char *cfolio_out;
|
|
int nr_folios = 0;
|
|
struct folio *in_folio = NULL;
|
|
struct folio *out_folio = NULL;
|
|
unsigned long len = *total_out;
|
|
unsigned long nr_dest_folios = *out_folios;
|
|
const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
|
|
const u64 orig_end = start + len;
|
|
|
|
*out_folios = 0;
|
|
*total_out = 0;
|
|
*total_in = 0;
|
|
|
|
ret = zlib_deflateInit(&workspace->strm, workspace->level);
|
|
if (unlikely(ret != Z_OK)) {
|
|
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
|
|
|
btrfs_err(inode->root->fs_info,
|
|
"zlib compression init failed, error %d root %llu inode %llu offset %llu",
|
|
ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
workspace->strm.total_in = 0;
|
|
workspace->strm.total_out = 0;
|
|
|
|
out_folio = btrfs_alloc_compr_folio();
|
|
if (out_folio == NULL) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
cfolio_out = folio_address(out_folio);
|
|
folios[0] = out_folio;
|
|
nr_folios = 1;
|
|
|
|
workspace->strm.next_in = workspace->buf;
|
|
workspace->strm.avail_in = 0;
|
|
workspace->strm.next_out = cfolio_out;
|
|
workspace->strm.avail_out = PAGE_SIZE;
|
|
|
|
while (workspace->strm.total_in < len) {
|
|
/*
|
|
* Get next input pages and copy the contents to
|
|
* the workspace buffer if required.
|
|
*/
|
|
if (workspace->strm.avail_in == 0) {
|
|
unsigned long bytes_left = len - workspace->strm.total_in;
|
|
unsigned int copy_length = min(bytes_left, workspace->buf_size);
|
|
|
|
/*
|
|
* This can only happen when hardware zlib compression is
|
|
* enabled.
|
|
*/
|
|
if (copy_length > PAGE_SIZE) {
|
|
ret = copy_data_into_buffer(mapping, workspace,
|
|
start, copy_length);
|
|
if (ret < 0)
|
|
goto out;
|
|
start += copy_length;
|
|
workspace->strm.next_in = workspace->buf;
|
|
workspace->strm.avail_in = copy_length;
|
|
} else {
|
|
unsigned int pg_off;
|
|
unsigned int cur_len;
|
|
|
|
if (data_in) {
|
|
kunmap_local(data_in);
|
|
folio_put(in_folio);
|
|
data_in = NULL;
|
|
}
|
|
ret = btrfs_compress_filemap_get_folio(mapping,
|
|
start, &in_folio);
|
|
if (ret < 0)
|
|
goto out;
|
|
pg_off = offset_in_page(start);
|
|
cur_len = btrfs_calc_input_length(orig_end, start);
|
|
data_in = kmap_local_folio(in_folio, pg_off);
|
|
start += cur_len;
|
|
workspace->strm.next_in = data_in;
|
|
workspace->strm.avail_in = cur_len;
|
|
}
|
|
}
|
|
|
|
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
|
|
if (unlikely(ret != Z_OK)) {
|
|
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
|
|
|
btrfs_warn(inode->root->fs_info,
|
|
"zlib compression failed, error %d root %llu inode %llu offset %llu",
|
|
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
|
start);
|
|
zlib_deflateEnd(&workspace->strm);
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
/* we're making it bigger, give up */
|
|
if (workspace->strm.total_in > 8192 &&
|
|
workspace->strm.total_in <
|
|
workspace->strm.total_out) {
|
|
ret = -E2BIG;
|
|
goto out;
|
|
}
|
|
/* we need another page for writing out. Test this
|
|
* before the total_in so we will pull in a new page for
|
|
* the stream end if required
|
|
*/
|
|
if (workspace->strm.avail_out == 0) {
|
|
if (nr_folios == nr_dest_folios) {
|
|
ret = -E2BIG;
|
|
goto out;
|
|
}
|
|
out_folio = btrfs_alloc_compr_folio();
|
|
if (out_folio == NULL) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
cfolio_out = folio_address(out_folio);
|
|
folios[nr_folios] = out_folio;
|
|
nr_folios++;
|
|
workspace->strm.avail_out = PAGE_SIZE;
|
|
workspace->strm.next_out = cfolio_out;
|
|
}
|
|
/* we're all done */
|
|
if (workspace->strm.total_in >= len)
|
|
break;
|
|
if (workspace->strm.total_out > max_out)
|
|
break;
|
|
}
|
|
workspace->strm.avail_in = 0;
|
|
/*
|
|
* Call deflate with Z_FINISH flush parameter providing more output
|
|
* space but no more input data, until it returns with Z_STREAM_END.
|
|
*/
|
|
while (ret != Z_STREAM_END) {
|
|
ret = zlib_deflate(&workspace->strm, Z_FINISH);
|
|
if (ret == Z_STREAM_END)
|
|
break;
|
|
if (ret != Z_OK && ret != Z_BUF_ERROR) {
|
|
zlib_deflateEnd(&workspace->strm);
|
|
ret = -EIO;
|
|
goto out;
|
|
} else if (workspace->strm.avail_out == 0) {
|
|
/* Get another folio for the stream end. */
|
|
if (nr_folios == nr_dest_folios) {
|
|
ret = -E2BIG;
|
|
goto out;
|
|
}
|
|
out_folio = btrfs_alloc_compr_folio();
|
|
if (out_folio == NULL) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
cfolio_out = folio_address(out_folio);
|
|
folios[nr_folios] = out_folio;
|
|
nr_folios++;
|
|
workspace->strm.avail_out = PAGE_SIZE;
|
|
workspace->strm.next_out = cfolio_out;
|
|
}
|
|
}
|
|
zlib_deflateEnd(&workspace->strm);
|
|
|
|
if (workspace->strm.total_out >= workspace->strm.total_in) {
|
|
ret = -E2BIG;
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
*total_out = workspace->strm.total_out;
|
|
*total_in = workspace->strm.total_in;
|
|
out:
|
|
*out_folios = nr_folios;
|
|
if (data_in) {
|
|
kunmap_local(data_in);
|
|
folio_put(in_folio);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
|
{
|
|
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
|
int ret = 0, ret2;
|
|
int wbits = MAX_WBITS;
|
|
char *data_in;
|
|
size_t total_out = 0;
|
|
unsigned long folio_in_index = 0;
|
|
size_t srclen = cb->compressed_len;
|
|
unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
|
|
unsigned long buf_start;
|
|
struct folio **folios_in = cb->compressed_folios;
|
|
|
|
data_in = kmap_local_folio(folios_in[folio_in_index], 0);
|
|
workspace->strm.next_in = data_in;
|
|
workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
|
|
workspace->strm.total_in = 0;
|
|
|
|
workspace->strm.total_out = 0;
|
|
workspace->strm.next_out = workspace->buf;
|
|
workspace->strm.avail_out = workspace->buf_size;
|
|
|
|
/* If it's deflate, and it's got no preset dictionary, then
|
|
we can tell zlib to skip the adler32 check. */
|
|
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
|
|
((data_in[0] & 0x0f) == Z_DEFLATED) &&
|
|
!(((data_in[0]<<8) + data_in[1]) % 31)) {
|
|
|
|
wbits = -((data_in[0] >> 4) + 8);
|
|
workspace->strm.next_in += 2;
|
|
workspace->strm.avail_in -= 2;
|
|
}
|
|
|
|
ret = zlib_inflateInit2(&workspace->strm, wbits);
|
|
if (unlikely(ret != Z_OK)) {
|
|
struct btrfs_inode *inode = cb->bbio.inode;
|
|
|
|
kunmap_local(data_in);
|
|
btrfs_err(inode->root->fs_info,
|
|
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
|
|
ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
|
|
return -EIO;
|
|
}
|
|
while (workspace->strm.total_in < srclen) {
|
|
ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
|
|
if (ret != Z_OK && ret != Z_STREAM_END)
|
|
break;
|
|
|
|
buf_start = total_out;
|
|
total_out = workspace->strm.total_out;
|
|
|
|
/* we didn't make progress in this inflate call, we're done */
|
|
if (buf_start == total_out)
|
|
break;
|
|
|
|
ret2 = btrfs_decompress_buf2page(workspace->buf,
|
|
total_out - buf_start, cb, buf_start);
|
|
if (ret2 == 0) {
|
|
ret = 0;
|
|
goto done;
|
|
}
|
|
|
|
workspace->strm.next_out = workspace->buf;
|
|
workspace->strm.avail_out = workspace->buf_size;
|
|
|
|
if (workspace->strm.avail_in == 0) {
|
|
unsigned long tmp;
|
|
kunmap_local(data_in);
|
|
folio_in_index++;
|
|
if (folio_in_index >= total_folios_in) {
|
|
data_in = NULL;
|
|
break;
|
|
}
|
|
data_in = kmap_local_folio(folios_in[folio_in_index], 0);
|
|
workspace->strm.next_in = data_in;
|
|
tmp = srclen - workspace->strm.total_in;
|
|
workspace->strm.avail_in = min(tmp, PAGE_SIZE);
|
|
}
|
|
}
|
|
if (unlikely(ret != Z_STREAM_END)) {
|
|
btrfs_err(cb->bbio.inode->root->fs_info,
|
|
"zlib decompression failed, error %d root %llu inode %llu offset %llu",
|
|
ret, btrfs_root_id(cb->bbio.inode->root),
|
|
btrfs_ino(cb->bbio.inode), cb->start);
|
|
ret = -EIO;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
done:
|
|
zlib_inflateEnd(&workspace->strm);
|
|
if (data_in)
|
|
kunmap_local(data_in);
|
|
return ret;
|
|
}
|
|
|
|
int zlib_decompress(struct list_head *ws, const u8 *data_in,
|
|
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
|
|
size_t destlen)
|
|
{
|
|
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
|
int ret = 0;
|
|
int wbits = MAX_WBITS;
|
|
unsigned long to_copy;
|
|
|
|
workspace->strm.next_in = data_in;
|
|
workspace->strm.avail_in = srclen;
|
|
workspace->strm.total_in = 0;
|
|
|
|
workspace->strm.next_out = workspace->buf;
|
|
workspace->strm.avail_out = workspace->buf_size;
|
|
workspace->strm.total_out = 0;
|
|
/* If it's deflate, and it's got no preset dictionary, then
|
|
we can tell zlib to skip the adler32 check. */
|
|
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
|
|
((data_in[0] & 0x0f) == Z_DEFLATED) &&
|
|
!(((data_in[0]<<8) + data_in[1]) % 31)) {
|
|
|
|
wbits = -((data_in[0] >> 4) + 8);
|
|
workspace->strm.next_in += 2;
|
|
workspace->strm.avail_in -= 2;
|
|
}
|
|
|
|
ret = zlib_inflateInit2(&workspace->strm, wbits);
|
|
if (unlikely(ret != Z_OK)) {
|
|
struct btrfs_inode *inode = folio_to_inode(dest_folio);
|
|
|
|
btrfs_err(inode->root->fs_info,
|
|
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
|
|
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
|
folio_pos(dest_folio));
|
|
return -EIO;
|
|
}
|
|
|
|
/*
|
|
* Everything (in/out buf) should be at most one sector, there should
|
|
* be no need to switch any input/output buffer.
|
|
*/
|
|
ret = zlib_inflate(&workspace->strm, Z_FINISH);
|
|
to_copy = min(workspace->strm.total_out, destlen);
|
|
if (ret != Z_STREAM_END)
|
|
goto out;
|
|
|
|
memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
|
|
|
|
out:
|
|
if (unlikely(to_copy != destlen)) {
|
|
struct btrfs_inode *inode = folio_to_inode(dest_folio);
|
|
|
|
btrfs_err(inode->root->fs_info,
|
|
"zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
|
|
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
|
folio_pos(dest_folio), to_copy, destlen);
|
|
ret = -EIO;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
|
|
zlib_inflateEnd(&workspace->strm);
|
|
|
|
if (unlikely(to_copy < destlen))
|
|
folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
|
|
return ret;
|
|
}
|
|
|
|
const struct btrfs_compress_op btrfs_zlib_compress = {
|
|
.workspace_manager = &wsm,
|
|
.min_level = 1,
|
|
.max_level = 9,
|
|
.default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
|
|
};
|