linux/fs/btrfs/tests/extent-map-tests.c

1080 lines
26 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017 Oracle. All rights reserved.
*/
#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../btrfs_inode.h"
#include "../volumes.h"
#include "../disk-io.h"
#include "../block-group.h"
static int free_extent_map_tree(struct btrfs_inode *inode)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
struct rb_node *node;
int ret = 0;
write_lock(&em_tree->lock);
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
node = rb_first_cached(&em_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
remove_extent_mapping(inode, em);
#ifdef CONFIG_BTRFS_DEBUG
if (refcount_read(&em->refs) != 1) {
ret = -EINVAL;
test_err(
"em leak: em (start %llu len %llu block_start %llu block_len %llu) refs %d",
em->start, em->len, em->block_start,
em->block_len, refcount_read(&em->refs));
refcount_set(&em->refs, 1);
}
#endif
free_extent_map(em);
}
write_unlock(&em_tree->lock);
return ret;
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet, there is a file
* extent [0, 16K), followed by another file extent [16K, 20K), two dio reads
* are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is
* reading [0, 8K)
*
* t1 t2
* btrfs_get_extent() btrfs_get_extent()
* -> lookup_extent_mapping() ->lookup_extent_mapping()
* -> add_extent_mapping(0, 16K)
* -> return em
* ->add_extent_mapping(0, 16K)
* -> #handle -EEXIST
*/
static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
u64 start = 0;
u64 len = SZ_8K;
int ret;
int ret2;
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
}
/* Add [0, 16K) */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [0, 16K)");
goto out;
}
free_extent_map(em);
/* Add [16K, 20K) following [0, 16K) */
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
em->start = SZ_16K;
em->len = SZ_4K;
em->block_start = SZ_32K; /* avoid merging */
em->block_len = SZ_4K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [16K, 20K)");
goto out;
}
free_extent_map(em);
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
/* Add [0, 8K), should return [0, 16K) instead. */
em->start = start;
em->len = len;
em->block_start = start;
em->block_len = len;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret) {
test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
goto out;
}
if (!em) {
test_err("case1 [%llu %llu]: no extent map returned",
start, start + len);
ret = -ENOENT;
goto out;
}
if (em->start != 0 || extent_map_end(em) != SZ_16K ||
em->block_start != 0 || em->block_len != SZ_16K) {
test_err(
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
ret = -EINVAL;
}
free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
/*
* Test scenario:
*
* Reading the inline ending up with EEXIST, ie. read an inline
* extent and discard page cache and read it again.
*/
static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
int ret2;
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
}
/* Add [0, 1K) */
em->start = 0;
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [0, 1K)");
goto out;
}
free_extent_map(em);
/* Add [4K, 8K) following [0, 1K) */
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [4K, 8K)");
goto out;
}
free_extent_map(em);
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
/* Add [0, 1K) */
em->start = 0;
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret) {
test_err("case2 [0 1K]: ret %d", ret);
goto out;
}
if (!em) {
test_err("case2 [0 1K]: no extent map returned");
ret = -ENOENT;
goto out;
}
if (em->start != 0 || extent_map_end(em) != SZ_1K ||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1) {
test_err(
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
ret, em->start, em->len, em->block_start,
em->block_len);
ret = -EINVAL;
}
free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
static int __test_case_3(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode, u64 start)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
u64 len = SZ_4K;
int ret;
int ret2;
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
}
/* Add [4K, 8K) */
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [4K, 8K)");
goto out;
}
free_extent_map(em);
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
/* Add [0, 16K) */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, start, len);
write_unlock(&em_tree->lock);
if (ret) {
test_err("case3 [%llu %llu): ret %d",
start, start + len, ret);
goto out;
}
if (!em) {
test_err("case3 [%llu %llu): no extent map returned",
start, start + len);
ret = -ENOENT;
goto out;
}
/*
* Since bytes within em are contiguous, em->block_start is identical to
* em->start.
*/
if (start < em->start || start + len > extent_map_end(em) ||
em->start != em->block_start || em->len != em->block_len) {
test_err(
"case3 [%llu %llu): ret %d em (start %llu len %llu block_start %llu block_len %llu)",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
ret = -EINVAL;
}
free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet.
* There is a file extent [0, 16K), two jobs are running concurrently
* against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio
* read from [0, 4K) or [8K, 12K) or [12K, 16K).
*
* t1 goes ahead of t2 and adds em [4K, 8K) into tree.
*
* t1 t2
* cow_file_range() btrfs_get_extent()
* -> lookup_extent_mapping()
* -> add_extent_mapping()
* -> add_extent_mapping()
*/
static int test_case_3(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
int ret;
ret = __test_case_3(fs_info, inode, 0);
if (ret)
return ret;
ret = __test_case_3(fs_info, inode, SZ_8K);
if (ret)
return ret;
ret = __test_case_3(fs_info, inode, (12 * SZ_1K));
return ret;
}
static int __test_case_4(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode, u64 start)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
u64 len = SZ_4K;
int ret;
int ret2;
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
}
/* Add [0K, 8K) */
em->start = 0;
em->len = SZ_8K;
em->block_start = 0;
em->block_len = SZ_8K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [0, 8K)");
goto out;
}
free_extent_map(em);
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
/* Add [8K, 32K) */
em->start = SZ_8K;
em->len = 24 * SZ_1K;
em->block_start = SZ_16K; /* avoid merging */
em->block_len = 24 * SZ_1K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("cannot add extent range [8K, 32K)");
goto out;
}
free_extent_map(em);
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
/* Add [0K, 32K) */
em->start = 0;
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, start, len);
write_unlock(&em_tree->lock);
if (ret) {
test_err("case4 [%llu %llu): ret %d",
start, start + len, ret);
goto out;
}
if (!em) {
test_err("case4 [%llu %llu): no extent map returned",
start, start + len);
ret = -ENOENT;
goto out;
}
if (start < em->start || start + len > extent_map_end(em)) {
test_err(
"case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu block_start %llu block_len %llu)",
start, start + len, ret, em->start, em->len, em->block_start,
em->block_len);
ret = -EINVAL;
}
free_extent_map(em);
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet.
* There is a file extent [0, 32K), two jobs are running concurrently
* against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio
* read from [0, 4K) or [4K, 8K).
*
* t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K).
*
* t1 t2
* btrfs_get_blocks_direct() btrfs_get_blocks_direct()
* -> btrfs_get_extent() -> btrfs_get_extent()
* -> lookup_extent_mapping()
* -> add_extent_mapping() -> lookup_extent_mapping()
* # load [0, 32K)
* -> btrfs_new_extent_direct()
* -> btrfs_drop_extent_cache()
* # split [0, 32K)
* -> add_extent_mapping()
* # add [8K, 32K)
* -> add_extent_mapping()
* # handle -EEXIST when adding
* # [0, 32K)
*/
static int test_case_4(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
int ret;
ret = __test_case_4(fs_info, inode, 0);
if (ret)
return ret;
ret = __test_case_4(fs_info, inode, SZ_4K);
return ret;
}
static int add_compressed_extent(struct btrfs_inode *inode,
u64 start, u64 len, u64 block_start)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
}
em->start = start;
em->len = len;
em->block_start = block_start;
em->block_len = SZ_4K;
btrfs: use the flags of an extent map to identify the compression type Currently, in struct extent_map, we use an unsigned int (32 bits) to identify the compression type of an extent and an unsigned long (64 bits on a 64 bits platform, 32 bits otherwise) for flags. We are only using 6 different flags, so an unsigned long is excessive and we can use flags to identify the compression type instead of using a dedicated 32 bits field. We can easily have tens or hundreds of thousands (or more) of extent maps on busy and large filesystems, specially with compression enabled or many or large files with tons of small extents. So it's convenient to have the extent_map structure as small as possible in order to use less memory. So remove the compression type field from struct extent_map, use flags to identify the compression type and shorten the flags field from an unsigned long to a u32. This saves 8 bytes (on 64 bits platforms) and reduces the size of the structure from 136 bytes down to 128 bytes, using now only two cache lines, and increases the number of extent maps we can have per 4K page from 30 to 32. By using a u32 for the flags instead of an unsigned long, we no longer use test_bit(), set_bit() and clear_bit(), but that level of atomicity is not needed as most flags are never cleared once set (before adding an extent map to the tree), and the ones that can be cleared or set after an extent map is added to the tree, are always performed while holding the write lock on the extent map tree, while the reader holds a lock on the tree or tests for a flag that never changes once the extent map is in the tree (such as compression flags). Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-12-04 16:20:33 +00:00
em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
free_extent_map(em);
if (ret < 0) {
test_err("cannot add extent map [%llu, %llu)", start, start + len);
return ret;
}
return 0;
}
struct extent_range {
u64 start;
u64 len;
};
/* The valid states of the tree after every drop, as described below. */
struct extent_range valid_ranges[][7] = {
{
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
{ .start = SZ_4K * 3, .len = SZ_4K * 3}, /* [12k, 24k) */
{ .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
},
{
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
{ .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
},
{
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
{ .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
{ .start = SZ_32K, .len = SZ_4K}, /* [32k, 36k) */
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
},
{
{ .start = 0, .len = SZ_8K}, /* [0, 8K) */
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
{ .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
}
};
static int validate_range(struct extent_map_tree *em_tree, int index)
{
struct rb_node *n;
int i;
for (i = 0, n = rb_first_cached(&em_tree->map);
valid_ranges[index][i].len && n;
i++, n = rb_next(n)) {
struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
if (entry->start != valid_ranges[index][i].start) {
test_err("mapping has start %llu expected %llu",
entry->start, valid_ranges[index][i].start);
return -EINVAL;
}
if (entry->len != valid_ranges[index][i].len) {
test_err("mapping has len %llu expected %llu",
entry->len, valid_ranges[index][i].len);
return -EINVAL;
}
}
/*
* We exited because we don't have any more entries in the extent_map
* but we still expect more valid entries.
*/
if (valid_ranges[index][i].len) {
test_err("missing an entry");
return -EINVAL;
}
/* We exited the loop but still have entries in the extent map. */
if (n) {
test_err("we have a left over entry in the extent map we didn't expect");
return -EINVAL;
}
return 0;
}
/*
* Test scenario:
*
* Test the various edge cases of btrfs_drop_extent_map_range, create the
* following ranges
*
* [0, 12k)[12k, 24k)[24k, 36k)[36k, 40k)[40k,64k)
*
* And then we'll drop:
*
* [8k, 12k) - test the single front split
* [12k, 20k) - test the single back split
* [28k, 32k) - test the double split
* [32k, 64k) - test whole em dropping
*
* They'll have the EXTENT_FLAG_COMPRESSED flag set to keep the em tree from
* merging the em's.
*/
static int test_case_5(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
u64 start, end;
int ret;
int ret2;
test_msg("Running btrfs_drop_extent_map_range tests");
/* [0, 12k) */
ret = add_compressed_extent(inode, 0, SZ_4K * 3, 0);
if (ret) {
test_err("cannot add extent range [0, 12K)");
goto out;
}
/* [12k, 24k) */
ret = add_compressed_extent(inode, SZ_4K * 3, SZ_4K * 3, SZ_4K);
if (ret) {
test_err("cannot add extent range [12k, 24k)");
goto out;
}
/* [24k, 36k) */
ret = add_compressed_extent(inode, SZ_4K * 6, SZ_4K * 3, SZ_8K);
if (ret) {
test_err("cannot add extent range [12k, 24k)");
goto out;
}
/* [36k, 40k) */
ret = add_compressed_extent(inode, SZ_32K + SZ_4K, SZ_4K, SZ_4K * 3);
if (ret) {
test_err("cannot add extent range [12k, 24k)");
goto out;
}
/* [40k, 64k) */
ret = add_compressed_extent(inode, SZ_4K * 10, SZ_4K * 6, SZ_16K);
if (ret) {
test_err("cannot add extent range [12k, 24k)");
goto out;
}
/* Drop [8k, 12k) */
start = SZ_8K;
end = (3 * SZ_4K) - 1;
btrfs_drop_extent_map_range(inode, start, end, false);
ret = validate_range(&inode->extent_tree, 0);
if (ret)
goto out;
/* Drop [12k, 20k) */
start = SZ_4K * 3;
end = SZ_16K + SZ_4K - 1;
btrfs_drop_extent_map_range(inode, start, end, false);
ret = validate_range(&inode->extent_tree, 1);
if (ret)
goto out;
/* Drop [28k, 32k) */
start = SZ_32K - SZ_4K;
end = SZ_32K - 1;
btrfs_drop_extent_map_range(inode, start, end, false);
ret = validate_range(&inode->extent_tree, 2);
if (ret)
goto out;
/* Drop [32k, 64k) */
start = SZ_32K;
end = SZ_64K - 1;
btrfs_drop_extent_map_range(inode, start, end, false);
ret = validate_range(&inode->extent_tree, 3);
if (ret)
goto out;
out:
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
/*
* Test the btrfs_add_extent_mapping helper which will attempt to create an em
* for areas between two existing ems. Validate it doesn't do this when there
* are two unmerged em's side by side.
*/
static int test_case_6(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em = NULL;
int ret;
int ret2;
ret = add_compressed_extent(inode, 0, SZ_4K, 0);
if (ret)
goto out;
ret = add_compressed_extent(inode, SZ_4K, SZ_4K, 0);
if (ret)
goto out;
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_16K;
em->block_len = SZ_16K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, 0, SZ_8K);
write_unlock(&em_tree->lock);
if (ret != 0) {
test_err("got an error when adding our em: %d", ret);
goto out;
}
ret = -EINVAL;
if (em->start != 0) {
test_err("unexpected em->start at %llu, wanted 0", em->start);
goto out;
}
if (em->len != SZ_4K) {
test_err("unexpected em->len %llu, expected 4K", em->len);
goto out;
}
ret = 0;
out:
free_extent_map(em);
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
/*
* Regression test for btrfs_drop_extent_map_range. Calling with skip_pinned ==
* true would mess up the start/end calculations and subsequent splits would be
* incorrect.
*/
static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
int ret2;
test_msg("Running btrfs_drop_extent_cache with pinned");
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
return -ENOMEM;
}
/* [0, 16K), pinned */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_4K;
btrfs: use the flags of an extent map to identify the compression type Currently, in struct extent_map, we use an unsigned int (32 bits) to identify the compression type of an extent and an unsigned long (64 bits on a 64 bits platform, 32 bits otherwise) for flags. We are only using 6 different flags, so an unsigned long is excessive and we can use flags to identify the compression type instead of using a dedicated 32 bits field. We can easily have tens or hundreds of thousands (or more) of extent maps on busy and large filesystems, specially with compression enabled or many or large files with tons of small extents. So it's convenient to have the extent_map structure as small as possible in order to use less memory. So remove the compression type field from struct extent_map, use flags to identify the compression type and shorten the flags field from an unsigned long to a u32. This saves 8 bytes (on 64 bits platforms) and reduces the size of the structure from 136 bytes down to 128 bytes, using now only two cache lines, and increases the number of extent maps we can have per 4K page from 30 to 32. By using a u32 for the flags instead of an unsigned long, we no longer use test_bit(), set_bit() and clear_bit(), but that level of atomicity is not needed as most flags are never cleared once set (before adding an extent map to the tree), and the ones that can be cleared or set after an extent map is added to the tree, are always performed while holding the write lock on the extent map tree, while the reader holds a lock on the tree or tests for a flag that never changes once the extent map is in the tree (such as compression flags). Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-12-04 16:20:33 +00:00
em->flags |= EXTENT_FLAG_PINNED;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("couldn't add extent map");
goto out;
}
free_extent_map(em);
em = alloc_extent_map();
if (!em) {
test_std_err(TEST_ALLOC_EXTENT_MAP);
ret = -ENOMEM;
goto out;
}
/* [32K, 48K), not pinned */
em->start = SZ_32K;
em->len = SZ_16K;
em->block_start = SZ_32K;
em->block_len = SZ_16K;
write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock);
if (ret < 0) {
test_err("couldn't add extent map");
goto out;
}
free_extent_map(em);
/*
* Drop [0, 36K) This should skip the [0, 4K) extent and then split the
* [32K, 48K) extent.
*/
btrfs_drop_extent_map_range(inode, 0, (36 * SZ_1K) - 1, true);
/* Make sure our extent maps look sane. */
ret = -EINVAL;
em = lookup_extent_mapping(em_tree, 0, SZ_16K);
if (!em) {
test_err("didn't find an em at 0 as expected");
goto out;
}
if (em->start != 0) {
test_err("em->start is %llu, expected 0", em->start);
goto out;
}
if (em->len != SZ_16K) {
test_err("em->len is %llu, expected 16K", em->len);
goto out;
}
free_extent_map(em);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
read_unlock(&em_tree->lock);
if (em) {
test_err("found an em when we weren't expecting one");
goto out;
}
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
read_unlock(&em_tree->lock);
if (!em) {
test_err("didn't find an em at 32K as expected");
goto out;
}
if (em->start != (36 * SZ_1K)) {
test_err("em->start is %llu, expected 36K", em->start);
goto out;
}
if (em->len != (12 * SZ_1K)) {
test_err("em->len is %llu, expected 12K", em->len);
goto out;
}
btrfs: fix wrong block_start calculation for btrfs_drop_extent_map_range() [BUG] During my extent_map cleanup/refactor, with extra sanity checks, extent-map-tests::test_case_7() would not pass the checks. The problem is, after btrfs_drop_extent_map_range(), the resulted extent_map has a @block_start way too large. Meanwhile my btrfs_file_extent_item based members are returning a correct @disk_bytenr/@offset combination. The extent map layout looks like this: 0 16K 32K 48K | PINNED | | Regular | The regular em at [32K, 48K) also has 32K @block_start. Then drop range [0, 36K), which should shrink the regular one to be [36K, 48K). However the @block_start is incorrect, we expect 32K + 4K, but got 52K. [CAUSE] Inside btrfs_drop_extent_map_range() function, if we hit an extent_map that covers the target range but is still beyond it, we need to split that extent map into half: |<-- drop range -->| |<----- existing extent_map --->| And if the extent map is not compressed, we need to forward extent_map::block_start by the difference between the end of drop range and the extent map start. However in that particular case, the difference is calculated using (start + len - em->start). The problem is @start can be modified if the drop range covers any pinned extent. This leads to wrong calculation, and would be caught by my later extent_map sanity checks, which checks the em::block_start against btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset. This is a regression caused by commit c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range"), which removed the @len update for pinned extents. [FIX] Fix it by avoiding using @start completely, and use @end - em->start instead, which @end is exclusive bytenr number. And update the test case to verify the @block_start to prevent such problem from happening. Thankfully this is not going to lead to any data corruption, as IO path does not utilize btrfs_drop_extent_map_range() with @skip_pinned set. So this fix is only here for the sake of consistency/correctness. CC: stable@vger.kernel.org # 6.5+ Fixes: c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range") Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2024-04-09 20:32:34 +09:30
if (em->block_start != SZ_32K + SZ_4K) {
test_err("em->block_start is %llu, expected 36K", em->block_start);
goto out;
}
free_extent_map(em);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
read_unlock(&em_tree->lock);
if (em) {
test_err("found an unexpected em above 48K");
goto out;
}
ret = 0;
out:
free_extent_map(em);
/* Unpin our extent to prevent warning when removing it below. */
ret2 = unpin_extent_cache(inode, 0, SZ_16K, 0);
if (ret == 0)
ret = ret2;
ret2 = free_extent_map_tree(inode);
if (ret == 0)
ret = ret2;
return ret;
}
struct rmap_test_vector {
u64 raid_type;
u64 physical_start;
u64 data_stripe_size;
u64 num_data_stripes;
u64 num_stripes;
/* Assume we won't have more than 5 physical stripes */
u64 data_stripe_phys_start[5];
bool expected_mapped_addr;
/* Physical to logical addresses */
u64 mapped_logical[5];
};
static int test_rmap_block(struct btrfs_fs_info *fs_info,
struct rmap_test_vector *test)
{
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
struct btrfs_chunk_map *map;
u64 *logical = NULL;
int i, out_ndaddrs, out_stripe_len;
int ret;
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
map = btrfs_alloc_chunk_map(test->num_stripes, GFP_KERNEL);
if (!map) {
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
test_std_err(TEST_ALLOC_CHUNK_MAP);
return -ENOMEM;
}
/* Start at 4GiB logical address */
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
map->start = SZ_4G;
map->chunk_len = test->data_stripe_size * test->num_data_stripes;
map->stripe_size = test->data_stripe_size;
map->num_stripes = test->num_stripes;
map->type = test->raid_type;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *dev = btrfs_alloc_dummy_device(fs_info);
if (IS_ERR(dev)) {
test_err("cannot allocate device");
ret = PTR_ERR(dev);
goto out;
}
map->stripes[i].dev = dev;
map->stripes[i].physical = test->data_stripe_phys_start[i];
}
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
ret = btrfs_add_chunk_map(fs_info, map);
if (ret) {
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
test_err("error adding chunk map to mapping tree");
goto out_free;
}
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
ret = btrfs_rmap_block(fs_info, map->start, btrfs_sb_offset(1),
&logical, &out_ndaddrs, &out_stripe_len);
if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
test_err("didn't rmap anything but expected %d",
test->expected_mapped_addr);
goto out;
}
if (out_stripe_len != BTRFS_STRIPE_LEN) {
test_err("calculated stripe length doesn't match");
goto out;
}
if (out_ndaddrs != test->expected_mapped_addr) {
for (i = 0; i < out_ndaddrs; i++)
test_msg("mapped %llu", logical[i]);
test_err("unexpected number of mapped addresses: %d", out_ndaddrs);
goto out;
}
for (i = 0; i < out_ndaddrs; i++) {
if (logical[i] != test->mapped_logical[i]) {
test_err("unexpected logical address mapped");
goto out;
}
}
ret = 0;
out:
btrfs: use a dedicated data structure for chunk maps Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
btrfs_remove_chunk_map(fs_info, map);
out_free:
kfree(logical);
return ret;
}
int btrfs_test_extent_map(void)
{
struct btrfs_fs_info *fs_info = NULL;
struct inode *inode;
struct btrfs_root *root = NULL;
int ret = 0, i;
struct rmap_test_vector rmap_tests[] = {
{
/*
* Test a chunk with 2 data stripes one of which
* intersects the physical address of the super block
* is correctly recognised.
*/
.raid_type = BTRFS_BLOCK_GROUP_RAID1,
.physical_start = SZ_64M - SZ_4M,
.data_stripe_size = SZ_256M,
.num_data_stripes = 2,
.num_stripes = 2,
.data_stripe_phys_start =
{SZ_64M - SZ_4M, SZ_64M - SZ_4M + SZ_256M},
.expected_mapped_addr = true,
.mapped_logical= {SZ_4G + SZ_4M}
},
{
/*
* Test that out-of-range physical addresses are
* ignored
*/
/* SINGLE chunk type */
.raid_type = 0,
.physical_start = SZ_4G,
.data_stripe_size = SZ_256M,
.num_data_stripes = 1,
.num_stripes = 1,
.data_stripe_phys_start = {SZ_256M},
.expected_mapped_addr = false,
.mapped_logical = {0}
}
};
test_msg("running extent_map tests");
/*
* Note: the fs_info is not set up completely, we only need
* fs_info::fsid for the tracepoint.
*/
fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
if (!fs_info) {
test_std_err(TEST_ALLOC_FS_INFO);
return -ENOMEM;
}
inode = btrfs_new_test_inode();
if (!inode) {
test_std_err(TEST_ALLOC_INODE);
ret = -ENOMEM;
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
test_std_err(TEST_ALLOC_ROOT);
ret = PTR_ERR(root);
root = NULL;
goto out;
}
BTRFS_I(inode)->root = root;
ret = test_case_1(fs_info, BTRFS_I(inode));
if (ret)
goto out;
ret = test_case_2(fs_info, BTRFS_I(inode));
if (ret)
goto out;
ret = test_case_3(fs_info, BTRFS_I(inode));
if (ret)
goto out;
ret = test_case_4(fs_info, BTRFS_I(inode));
if (ret)
goto out;
ret = test_case_5(fs_info, BTRFS_I(inode));
if (ret)
goto out;
ret = test_case_6(fs_info, BTRFS_I(inode));
if (ret)
goto out;
ret = test_case_7(fs_info, BTRFS_I(inode));
if (ret)
goto out;
test_msg("running rmap tests");
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
ret = test_rmap_block(fs_info, &rmap_tests[i]);
if (ret)
goto out;
}
out:
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
return ret;
}