2018-04-03 19:23:33 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2018-01-05 12:51:12 -07:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2017 Oracle. All rights reserved.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include "btrfs-tests.h"
|
|
|
|
#include "../ctree.h"
|
2023-08-17 16:57:31 -04:00
|
|
|
#include "../btrfs_inode.h"
|
2019-12-10 20:00:45 +02:00
|
|
|
#include "../volumes.h"
|
|
|
|
#include "../disk-io.h"
|
|
|
|
#include "../block-group.h"
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2024-03-21 15:08:38 +00:00
|
|
|
static int free_extent_map_tree(struct btrfs_inode *inode)
|
2018-01-05 12:51:12 -07:00
|
|
|
{
|
2024-03-21 15:08:38 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2018-01-05 12:51:12 -07:00
|
|
|
struct extent_map *em;
|
|
|
|
struct rb_node *node;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret = 0;
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2022-02-03 15:36:45 +00:00
|
|
|
write_lock(&em_tree->lock);
|
2018-08-23 03:51:52 +08:00
|
|
|
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
|
|
|
|
node = rb_first_cached(&em_tree->map);
|
2018-01-05 12:51:12 -07:00
|
|
|
em = rb_entry(node, struct extent_map, rb_node);
|
2024-03-21 15:08:38 +00:00
|
|
|
remove_extent_mapping(inode, em);
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
#ifdef CONFIG_BTRFS_DEBUG
|
|
|
|
if (refcount_read(&em->refs) != 1) {
|
2024-01-11 16:04:26 +00:00
|
|
|
ret = -EINVAL;
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err(
|
2023-12-04 16:20:26 +00:00
|
|
|
"em leak: em (start %llu len %llu block_start %llu block_len %llu) refs %d",
|
2018-01-05 12:51:12 -07:00
|
|
|
em->start, em->len, em->block_start,
|
|
|
|
em->block_len, refcount_read(&em->refs));
|
|
|
|
|
|
|
|
refcount_set(&em->refs, 1);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
free_extent_map(em);
|
|
|
|
}
|
2022-02-03 15:36:45 +00:00
|
|
|
write_unlock(&em_tree->lock);
|
2024-01-11 16:04:26 +00:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:12 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test scenario:
|
|
|
|
*
|
|
|
|
* Suppose that no extent map has been loaded into memory yet, there is a file
|
|
|
|
* extent [0, 16K), followed by another file extent [16K, 20K), two dio reads
|
|
|
|
* are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is
|
|
|
|
* reading [0, 8K)
|
|
|
|
*
|
|
|
|
* t1 t2
|
|
|
|
* btrfs_get_extent() btrfs_get_extent()
|
|
|
|
* -> lookup_extent_mapping() ->lookup_extent_mapping()
|
|
|
|
* -> add_extent_mapping(0, 16K)
|
|
|
|
* -> return em
|
|
|
|
* ->add_extent_mapping(0, 16K)
|
|
|
|
* -> #handle -EEXIST
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2018-01-05 12:51:12 -07:00
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2018-01-05 12:51:12 -07:00
|
|
|
struct extent_map *em;
|
|
|
|
u64 start = 0;
|
|
|
|
u64 len = SZ_8K;
|
|
|
|
int ret;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret2;
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 17:28:46 +01:00
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
return -ENOMEM;
|
2019-03-15 17:28:46 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
/* Add [0, 16K) */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_16K;
|
|
|
|
em->block_start = 0;
|
|
|
|
em->block_len = SZ_16K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [0, 16K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
/* Add [16K, 20K) following [0, 16K) */
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:12 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
em->start = SZ_16K;
|
|
|
|
em->len = SZ_4K;
|
|
|
|
em->block_start = SZ_32K; /* avoid merging */
|
|
|
|
em->block_len = SZ_4K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [16K, 20K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:12 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
/* Add [0, 8K), should return [0, 16K) instead. */
|
|
|
|
em->start = start;
|
|
|
|
em->len = len;
|
|
|
|
em->block_start = start;
|
|
|
|
em->block_len = len;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret) {
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
|
2019-03-15 18:41:06 +01:00
|
|
|
goto out;
|
|
|
|
}
|
2023-12-04 16:20:25 +00:00
|
|
|
if (!em) {
|
|
|
|
test_err("case1 [%llu %llu]: no extent map returned",
|
|
|
|
start, start + len);
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (em->start != 0 || extent_map_end(em) != SZ_16K ||
|
|
|
|
em->block_start != 0 || em->block_len != SZ_16K) {
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err(
|
|
|
|
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
|
2018-01-05 12:51:12 -07:00
|
|
|
start, start + len, ret, em->start, em->len,
|
|
|
|
em->block_start, em->block_len);
|
2019-03-15 18:41:06 +01:00
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
out:
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
2019-03-15 18:06:16 +01:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:12 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test scenario:
|
|
|
|
*
|
|
|
|
* Reading the inline ending up with EEXIST, ie. read an inline
|
|
|
|
* extent and discard page cache and read it again.
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2018-01-05 12:51:12 -07:00
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2018-01-05 12:51:12 -07:00
|
|
|
struct extent_map *em;
|
|
|
|
int ret;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret2;
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 17:28:46 +01:00
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
return -ENOMEM;
|
2019-03-15 17:28:46 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
/* Add [0, 1K) */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_1K;
|
|
|
|
em->block_start = EXTENT_MAP_INLINE;
|
|
|
|
em->block_len = (u64)-1;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [0, 1K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
2019-03-18 14:19:33 +01:00
|
|
|
/* Add [4K, 8K) following [0, 1K) */
|
2018-01-05 12:51:12 -07:00
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:12 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
em->start = SZ_4K;
|
|
|
|
em->len = SZ_4K;
|
|
|
|
em->block_start = SZ_4K;
|
|
|
|
em->block_len = SZ_4K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [4K, 8K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:12 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
|
|
|
/* Add [0, 1K) */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_1K;
|
|
|
|
em->block_start = EXTENT_MAP_INLINE;
|
|
|
|
em->block_len = (u64)-1;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret) {
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err("case2 [0 1K]: ret %d", ret);
|
2019-03-15 18:41:06 +01:00
|
|
|
goto out;
|
|
|
|
}
|
2023-12-04 16:20:25 +00:00
|
|
|
if (!em) {
|
|
|
|
test_err("case2 [0 1K]: no extent map returned");
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (em->start != 0 || extent_map_end(em) != SZ_1K ||
|
|
|
|
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1) {
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err(
|
|
|
|
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
|
2018-01-05 12:51:12 -07:00
|
|
|
ret, em->start, em->len, em->block_start,
|
|
|
|
em->block_len);
|
2019-03-15 18:41:06 +01:00
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
out:
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
2019-03-15 18:06:16 +01:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:12 -07:00
|
|
|
}
|
|
|
|
|
2019-03-15 18:06:16 +01:00
|
|
|
static int __test_case_3(struct btrfs_fs_info *fs_info,
|
2024-01-11 15:13:35 +00:00
|
|
|
struct btrfs_inode *inode, u64 start)
|
2018-01-05 12:51:13 -07:00
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2018-01-05 12:51:13 -07:00
|
|
|
struct extent_map *em;
|
|
|
|
u64 len = SZ_4K;
|
|
|
|
int ret;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret2;
|
2018-01-05 12:51:13 -07:00
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 17:28:46 +01:00
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
return -ENOMEM;
|
2019-03-15 17:28:46 +01:00
|
|
|
}
|
2018-01-05 12:51:13 -07:00
|
|
|
|
|
|
|
/* Add [4K, 8K) */
|
|
|
|
em->start = SZ_4K;
|
|
|
|
em->len = SZ_4K;
|
|
|
|
em->block_start = SZ_4K;
|
|
|
|
em->block_len = SZ_4K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [4K, 8K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:13 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:13 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:13 -07:00
|
|
|
|
|
|
|
/* Add [0, 16K) */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_16K;
|
|
|
|
em->block_start = 0;
|
|
|
|
em->block_len = SZ_16K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, start, len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret) {
|
2023-12-04 16:20:26 +00:00
|
|
|
test_err("case3 [%llu %llu): ret %d",
|
2018-01-05 12:51:13 -07:00
|
|
|
start, start + len, ret);
|
2019-03-15 18:41:06 +01:00
|
|
|
goto out;
|
|
|
|
}
|
2023-12-04 16:20:25 +00:00
|
|
|
if (!em) {
|
2023-12-04 16:20:26 +00:00
|
|
|
test_err("case3 [%llu %llu): no extent map returned",
|
2023-12-04 16:20:25 +00:00
|
|
|
start, start + len);
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:13 -07:00
|
|
|
/*
|
|
|
|
* Since bytes within em are contiguous, em->block_start is identical to
|
|
|
|
* em->start.
|
|
|
|
*/
|
2023-12-04 16:20:25 +00:00
|
|
|
if (start < em->start || start + len > extent_map_end(em) ||
|
|
|
|
em->start != em->block_start || em->len != em->block_len) {
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err(
|
2023-12-04 16:20:26 +00:00
|
|
|
"case3 [%llu %llu): ret %d em (start %llu len %llu block_start %llu block_len %llu)",
|
2018-01-05 12:51:13 -07:00
|
|
|
start, start + len, ret, em->start, em->len,
|
|
|
|
em->block_start, em->block_len);
|
2019-03-15 18:41:06 +01:00
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2018-01-05 12:51:13 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
out:
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
2019-03-15 18:06:16 +01:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:13 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test scenario:
|
|
|
|
*
|
|
|
|
* Suppose that no extent map has been loaded into memory yet.
|
|
|
|
* There is a file extent [0, 16K), two jobs are running concurrently
|
|
|
|
* against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio
|
|
|
|
* read from [0, 4K) or [8K, 12K) or [12K, 16K).
|
|
|
|
*
|
|
|
|
* t1 goes ahead of t2 and adds em [4K, 8K) into tree.
|
|
|
|
*
|
|
|
|
* t1 t2
|
|
|
|
* cow_file_range() btrfs_get_extent()
|
|
|
|
* -> lookup_extent_mapping()
|
|
|
|
* -> add_extent_mapping()
|
|
|
|
* -> add_extent_mapping()
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_3(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2018-01-05 12:51:13 -07:00
|
|
|
{
|
2019-03-18 15:05:27 +01:00
|
|
|
int ret;
|
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = __test_case_3(fs_info, inode, 0);
|
2019-03-18 15:05:27 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = __test_case_3(fs_info, inode, SZ_8K);
|
2019-03-18 15:05:27 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = __test_case_3(fs_info, inode, (12 * SZ_1K));
|
2019-03-18 15:05:27 +01:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:13 -07:00
|
|
|
}
|
|
|
|
|
2019-03-15 18:06:16 +01:00
|
|
|
static int __test_case_4(struct btrfs_fs_info *fs_info,
|
2024-01-11 15:13:35 +00:00
|
|
|
struct btrfs_inode *inode, u64 start)
|
2018-01-05 12:51:14 -07:00
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2018-01-05 12:51:14 -07:00
|
|
|
struct extent_map *em;
|
|
|
|
u64 len = SZ_4K;
|
|
|
|
int ret;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret2;
|
2018-01-05 12:51:14 -07:00
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 17:28:46 +01:00
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
return -ENOMEM;
|
2019-03-15 17:28:46 +01:00
|
|
|
}
|
2018-01-05 12:51:14 -07:00
|
|
|
|
|
|
|
/* Add [0K, 8K) */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_8K;
|
|
|
|
em->block_start = 0;
|
|
|
|
em->block_len = SZ_8K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [0, 8K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:14 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:14 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:14 -07:00
|
|
|
|
2019-03-18 14:19:33 +01:00
|
|
|
/* Add [8K, 32K) */
|
2018-01-05 12:51:14 -07:00
|
|
|
em->start = SZ_8K;
|
2019-03-18 14:14:35 +01:00
|
|
|
em->len = 24 * SZ_1K;
|
2018-01-05 12:51:14 -07:00
|
|
|
em->block_start = SZ_16K; /* avoid merging */
|
2019-03-18 14:14:35 +01:00
|
|
|
em->block_len = 24 * SZ_1K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent range [8K, 32K)");
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:14 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
2019-03-15 18:06:16 +01:00
|
|
|
if (!em) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-01-05 12:51:14 -07:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:14 -07:00
|
|
|
/* Add [0K, 32K) */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_32K;
|
|
|
|
em->block_start = 0;
|
|
|
|
em->block_len = SZ_32K;
|
2019-04-03 19:32:56 +02:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, start, len);
|
2019-04-03 19:32:56 +02:00
|
|
|
write_unlock(&em_tree->lock);
|
2019-03-15 18:41:06 +01:00
|
|
|
if (ret) {
|
2023-12-04 16:20:26 +00:00
|
|
|
test_err("case4 [%llu %llu): ret %d",
|
2023-12-04 16:20:24 +00:00
|
|
|
start, start + len, ret);
|
2019-03-15 18:41:06 +01:00
|
|
|
goto out;
|
|
|
|
}
|
2023-12-04 16:20:25 +00:00
|
|
|
if (!em) {
|
2023-12-04 16:20:26 +00:00
|
|
|
test_err("case4 [%llu %llu): no extent map returned",
|
2023-12-04 16:20:25 +00:00
|
|
|
start, start + len);
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (start < em->start || start + len > extent_map_end(em)) {
|
2018-05-17 00:00:42 +02:00
|
|
|
test_err(
|
2023-12-04 16:20:26 +00:00
|
|
|
"case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu block_start %llu block_len %llu)",
|
2023-12-04 16:20:24 +00:00
|
|
|
start, start + len, ret, em->start, em->len, em->block_start,
|
2018-01-05 12:51:14 -07:00
|
|
|
em->block_len);
|
2019-03-15 18:41:06 +01:00
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2018-01-05 12:51:14 -07:00
|
|
|
free_extent_map(em);
|
|
|
|
out:
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
2019-03-15 18:06:16 +01:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:14 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test scenario:
|
|
|
|
*
|
|
|
|
* Suppose that no extent map has been loaded into memory yet.
|
|
|
|
* There is a file extent [0, 32K), two jobs are running concurrently
|
|
|
|
* against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio
|
|
|
|
* read from [0, 4K) or [4K, 8K).
|
|
|
|
*
|
|
|
|
* t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K).
|
|
|
|
*
|
|
|
|
* t1 t2
|
|
|
|
* btrfs_get_blocks_direct() btrfs_get_blocks_direct()
|
|
|
|
* -> btrfs_get_extent() -> btrfs_get_extent()
|
|
|
|
* -> lookup_extent_mapping()
|
|
|
|
* -> add_extent_mapping() -> lookup_extent_mapping()
|
|
|
|
* # load [0, 32K)
|
|
|
|
* -> btrfs_new_extent_direct()
|
|
|
|
* -> btrfs_drop_extent_cache()
|
|
|
|
* # split [0, 32K)
|
|
|
|
* -> add_extent_mapping()
|
|
|
|
* # add [8K, 32K)
|
|
|
|
* -> add_extent_mapping()
|
|
|
|
* # handle -EEXIST when adding
|
|
|
|
* # [0, 32K)
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_4(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2018-01-05 12:51:14 -07:00
|
|
|
{
|
2019-03-18 15:05:27 +01:00
|
|
|
int ret;
|
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = __test_case_4(fs_info, inode, 0);
|
2019-03-18 15:05:27 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = __test_case_4(fs_info, inode, SZ_4K);
|
2019-03-18 15:05:27 +01:00
|
|
|
|
|
|
|
return ret;
|
2018-01-05 12:51:14 -07:00
|
|
|
}
|
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
static int add_compressed_extent(struct btrfs_inode *inode,
|
2023-08-17 16:57:31 -04:00
|
|
|
u64 start, u64 len, u64 block_start)
|
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2023-08-17 16:57:31 -04:00
|
|
|
struct extent_map *em;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
em->start = start;
|
|
|
|
em->len = len;
|
|
|
|
em->block_start = block_start;
|
|
|
|
em->block_len = SZ_4K;
|
btrfs: use the flags of an extent map to identify the compression type
Currently, in struct extent_map, we use an unsigned int (32 bits) to
identify the compression type of an extent and an unsigned long (64 bits
on a 64 bits platform, 32 bits otherwise) for flags. We are only using
6 different flags, so an unsigned long is excessive and we can use flags
to identify the compression type instead of using a dedicated 32 bits
field.
We can easily have tens or hundreds of thousands (or more) of extent maps
on busy and large filesystems, specially with compression enabled or many
or large files with tons of small extents. So it's convenient to have the
extent_map structure as small as possible in order to use less memory.
So remove the compression type field from struct extent_map, use flags
to identify the compression type and shorten the flags field from an
unsigned long to a u32. This saves 8 bytes (on 64 bits platforms) and
reduces the size of the structure from 136 bytes down to 128 bytes, using
now only two cache lines, and increases the number of extent maps we can
have per 4K page from 30 to 32. By using a u32 for the flags instead of
an unsigned long, we no longer use test_bit(), set_bit() and clear_bit(),
but that level of atomicity is not needed as most flags are never cleared
once set (before adding an extent map to the tree), and the ones that can
be cleared or set after an extent map is added to the tree, are always
performed while holding the write lock on the extent map tree, while the
reader holds a lock on the tree or tests for a flag that never changes
once the extent map is in the tree (such as compression flags).
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-12-04 16:20:33 +00:00
|
|
|
em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
|
2023-08-17 16:57:31 -04:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2023-08-17 16:57:31 -04:00
|
|
|
write_unlock(&em_tree->lock);
|
|
|
|
free_extent_map(em);
|
|
|
|
if (ret < 0) {
|
|
|
|
test_err("cannot add extent map [%llu, %llu)", start, start + len);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct extent_range {
|
|
|
|
u64 start;
|
|
|
|
u64 len;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* The valid states of the tree after every drop, as described below. */
|
|
|
|
struct extent_range valid_ranges[][7] = {
|
|
|
|
{
|
|
|
|
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
|
|
|
|
{ .start = SZ_4K * 3, .len = SZ_4K * 3}, /* [12k, 24k) */
|
|
|
|
{ .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
|
|
|
|
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
|
|
|
|
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
|
|
|
|
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
|
|
|
|
{ .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
|
|
|
|
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
|
|
|
|
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
|
|
|
|
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
|
|
|
|
{ .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
|
|
|
|
{ .start = SZ_32K, .len = SZ_4K}, /* [32k, 36k) */
|
|
|
|
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
|
|
|
|
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{ .start = 0, .len = SZ_8K}, /* [0, 8K) */
|
|
|
|
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
|
|
|
|
{ .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static int validate_range(struct extent_map_tree *em_tree, int index)
|
|
|
|
{
|
|
|
|
struct rb_node *n;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0, n = rb_first_cached(&em_tree->map);
|
|
|
|
valid_ranges[index][i].len && n;
|
|
|
|
i++, n = rb_next(n)) {
|
|
|
|
struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
|
|
|
|
|
|
|
|
if (entry->start != valid_ranges[index][i].start) {
|
|
|
|
test_err("mapping has start %llu expected %llu",
|
|
|
|
entry->start, valid_ranges[index][i].start);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (entry->len != valid_ranges[index][i].len) {
|
|
|
|
test_err("mapping has len %llu expected %llu",
|
|
|
|
entry->len, valid_ranges[index][i].len);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We exited because we don't have any more entries in the extent_map
|
|
|
|
* but we still expect more valid entries.
|
|
|
|
*/
|
|
|
|
if (valid_ranges[index][i].len) {
|
|
|
|
test_err("missing an entry");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We exited the loop but still have entries in the extent map. */
|
|
|
|
if (n) {
|
|
|
|
test_err("we have a left over entry in the extent map we didn't expect");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test scenario:
|
|
|
|
*
|
|
|
|
* Test the various edge cases of btrfs_drop_extent_map_range, create the
|
|
|
|
* following ranges
|
|
|
|
*
|
|
|
|
* [0, 12k)[12k, 24k)[24k, 36k)[36k, 40k)[40k,64k)
|
|
|
|
*
|
|
|
|
* And then we'll drop:
|
|
|
|
*
|
|
|
|
* [8k, 12k) - test the single front split
|
|
|
|
* [12k, 20k) - test the single back split
|
|
|
|
* [28k, 32k) - test the double split
|
|
|
|
* [32k, 64k) - test whole em dropping
|
|
|
|
*
|
|
|
|
* They'll have the EXTENT_FLAG_COMPRESSED flag set to keep the em tree from
|
|
|
|
* merging the em's.
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_5(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2023-08-17 16:57:31 -04:00
|
|
|
{
|
|
|
|
u64 start, end;
|
|
|
|
int ret;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret2;
|
2023-08-17 16:57:31 -04:00
|
|
|
|
|
|
|
test_msg("Running btrfs_drop_extent_map_range tests");
|
|
|
|
|
|
|
|
/* [0, 12k) */
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, 0, SZ_4K * 3, 0);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret) {
|
|
|
|
test_err("cannot add extent range [0, 12K)");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* [12k, 24k) */
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, SZ_4K * 3, SZ_4K * 3, SZ_4K);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret) {
|
|
|
|
test_err("cannot add extent range [12k, 24k)");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* [24k, 36k) */
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, SZ_4K * 6, SZ_4K * 3, SZ_8K);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret) {
|
|
|
|
test_err("cannot add extent range [12k, 24k)");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* [36k, 40k) */
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, SZ_32K + SZ_4K, SZ_4K, SZ_4K * 3);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret) {
|
|
|
|
test_err("cannot add extent range [12k, 24k)");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* [40k, 64k) */
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, SZ_4K * 10, SZ_4K * 6, SZ_16K);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret) {
|
|
|
|
test_err("cannot add extent range [12k, 24k)");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Drop [8k, 12k) */
|
|
|
|
start = SZ_8K;
|
|
|
|
end = (3 * SZ_4K) - 1;
|
2024-01-11 15:13:35 +00:00
|
|
|
btrfs_drop_extent_map_range(inode, start, end, false);
|
|
|
|
ret = validate_range(&inode->extent_tree, 0);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Drop [12k, 20k) */
|
|
|
|
start = SZ_4K * 3;
|
|
|
|
end = SZ_16K + SZ_4K - 1;
|
2024-01-11 15:13:35 +00:00
|
|
|
btrfs_drop_extent_map_range(inode, start, end, false);
|
|
|
|
ret = validate_range(&inode->extent_tree, 1);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Drop [28k, 32k) */
|
|
|
|
start = SZ_32K - SZ_4K;
|
|
|
|
end = SZ_32K - 1;
|
2024-01-11 15:13:35 +00:00
|
|
|
btrfs_drop_extent_map_range(inode, start, end, false);
|
|
|
|
ret = validate_range(&inode->extent_tree, 2);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Drop [32k, 64k) */
|
|
|
|
start = SZ_32K;
|
|
|
|
end = SZ_64K - 1;
|
2024-01-11 15:13:35 +00:00
|
|
|
btrfs_drop_extent_map_range(inode, start, end, false);
|
|
|
|
ret = validate_range(&inode->extent_tree, 3);
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
out:
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
|
|
|
|
2023-08-17 16:57:31 -04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:57:32 -04:00
|
|
|
/*
|
|
|
|
* Test the btrfs_add_extent_mapping helper which will attempt to create an em
|
|
|
|
* for areas between two existing ems. Validate it doesn't do this when there
|
|
|
|
* are two unmerged em's side by side.
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_6(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2023-08-17 16:57:32 -04:00
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2023-08-17 16:57:32 -04:00
|
|
|
struct extent_map *em = NULL;
|
|
|
|
int ret;
|
2024-01-11 16:04:26 +00:00
|
|
|
int ret2;
|
2023-08-17 16:57:32 -04:00
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, 0, SZ_4K, 0);
|
2023-08-17 16:57:32 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = add_compressed_extent(inode, SZ_4K, SZ_4K, 0);
|
2023-08-17 16:57:32 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
2023-08-17 16:57:32 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
em->start = SZ_4K;
|
|
|
|
em->len = SZ_4K;
|
|
|
|
em->block_start = SZ_16K;
|
|
|
|
em->block_len = SZ_16K;
|
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, 0, SZ_8K);
|
2023-08-17 16:57:32 -04:00
|
|
|
write_unlock(&em_tree->lock);
|
|
|
|
|
|
|
|
if (ret != 0) {
|
|
|
|
test_err("got an error when adding our em: %d", ret);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
if (em->start != 0) {
|
|
|
|
test_err("unexpected em->start at %llu, wanted 0", em->start);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (em->len != SZ_4K) {
|
|
|
|
test_err("unexpected em->len %llu, expected 4K", em->len);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
free_extent_map(em);
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
|
|
|
|
2023-08-17 16:57:32 -04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:57:33 -04:00
|
|
|
/*
|
|
|
|
* Regression test for btrfs_drop_extent_map_range. Calling with skip_pinned ==
|
|
|
|
* true would mess up the start/end calculations and subsequent splits would be
|
|
|
|
* incorrect.
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
2023-08-17 16:57:33 -04:00
|
|
|
{
|
2024-01-11 15:13:35 +00:00
|
|
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
2023-08-17 16:57:33 -04:00
|
|
|
struct extent_map *em;
|
|
|
|
int ret;
|
2024-01-11 15:13:35 +00:00
|
|
|
int ret2;
|
2023-08-17 16:57:33 -04:00
|
|
|
|
|
|
|
test_msg("Running btrfs_drop_extent_cache with pinned");
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
2024-01-11 15:13:35 +00:00
|
|
|
return -ENOMEM;
|
2023-08-17 16:57:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* [0, 16K), pinned */
|
|
|
|
em->start = 0;
|
|
|
|
em->len = SZ_16K;
|
|
|
|
em->block_start = 0;
|
|
|
|
em->block_len = SZ_4K;
|
btrfs: use the flags of an extent map to identify the compression type
Currently, in struct extent_map, we use an unsigned int (32 bits) to
identify the compression type of an extent and an unsigned long (64 bits
on a 64 bits platform, 32 bits otherwise) for flags. We are only using
6 different flags, so an unsigned long is excessive and we can use flags
to identify the compression type instead of using a dedicated 32 bits
field.
We can easily have tens or hundreds of thousands (or more) of extent maps
on busy and large filesystems, specially with compression enabled or many
or large files with tons of small extents. So it's convenient to have the
extent_map structure as small as possible in order to use less memory.
So remove the compression type field from struct extent_map, use flags
to identify the compression type and shorten the flags field from an
unsigned long to a u32. This saves 8 bytes (on 64 bits platforms) and
reduces the size of the structure from 136 bytes down to 128 bytes, using
now only two cache lines, and increases the number of extent maps we can
have per 4K page from 30 to 32. By using a u32 for the flags instead of
an unsigned long, we no longer use test_bit(), set_bit() and clear_bit(),
but that level of atomicity is not needed as most flags are never cleared
once set (before adding an extent map to the tree), and the ones that can
be cleared or set after an extent map is added to the tree, are always
performed while holding the write lock on the extent map tree, while the
reader holds a lock on the tree or tests for a flag that never changes
once the extent map is in the tree (such as compression flags).
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-12-04 16:20:33 +00:00
|
|
|
em->flags |= EXTENT_FLAG_PINNED;
|
2023-08-17 16:57:33 -04:00
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2023-08-17 16:57:33 -04:00
|
|
|
write_unlock(&em_tree->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
test_err("couldn't add extent map");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
em = alloc_extent_map();
|
|
|
|
if (!em) {
|
|
|
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* [32K, 48K), not pinned */
|
|
|
|
em->start = SZ_32K;
|
|
|
|
em->len = SZ_16K;
|
|
|
|
em->block_start = SZ_32K;
|
|
|
|
em->block_len = SZ_16K;
|
|
|
|
write_lock(&em_tree->lock);
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
2023-08-17 16:57:33 -04:00
|
|
|
write_unlock(&em_tree->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
test_err("couldn't add extent map");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drop [0, 36K) This should skip the [0, 4K) extent and then split the
|
|
|
|
* [32K, 48K) extent.
|
|
|
|
*/
|
2024-01-11 15:13:35 +00:00
|
|
|
btrfs_drop_extent_map_range(inode, 0, (36 * SZ_1K) - 1, true);
|
2023-08-17 16:57:33 -04:00
|
|
|
|
|
|
|
/* Make sure our extent maps look sane. */
|
|
|
|
ret = -EINVAL;
|
|
|
|
|
|
|
|
em = lookup_extent_mapping(em_tree, 0, SZ_16K);
|
|
|
|
if (!em) {
|
|
|
|
test_err("didn't find an em at 0 as expected");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (em->start != 0) {
|
|
|
|
test_err("em->start is %llu, expected 0", em->start);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (em->len != SZ_16K) {
|
|
|
|
test_err("em->len is %llu, expected 16K", em->len);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
read_lock(&em_tree->lock);
|
|
|
|
em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
|
|
|
|
read_unlock(&em_tree->lock);
|
|
|
|
if (em) {
|
|
|
|
test_err("found an em when we weren't expecting one");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
read_lock(&em_tree->lock);
|
|
|
|
em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
|
|
|
|
read_unlock(&em_tree->lock);
|
|
|
|
if (!em) {
|
|
|
|
test_err("didn't find an em at 32K as expected");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (em->start != (36 * SZ_1K)) {
|
|
|
|
test_err("em->start is %llu, expected 36K", em->start);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (em->len != (12 * SZ_1K)) {
|
|
|
|
test_err("em->len is %llu, expected 12K", em->len);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
btrfs: fix wrong block_start calculation for btrfs_drop_extent_map_range()
[BUG]
During my extent_map cleanup/refactor, with extra sanity checks,
extent-map-tests::test_case_7() would not pass the checks.
The problem is, after btrfs_drop_extent_map_range(), the resulted
extent_map has a @block_start way too large.
Meanwhile my btrfs_file_extent_item based members are returning a
correct @disk_bytenr/@offset combination.
The extent map layout looks like this:
0 16K 32K 48K
| PINNED | | Regular |
The regular em at [32K, 48K) also has 32K @block_start.
Then drop range [0, 36K), which should shrink the regular one to be
[36K, 48K).
However the @block_start is incorrect, we expect 32K + 4K, but got 52K.
[CAUSE]
Inside btrfs_drop_extent_map_range() function, if we hit an extent_map
that covers the target range but is still beyond it, we need to split
that extent map into half:
|<-- drop range -->|
|<----- existing extent_map --->|
And if the extent map is not compressed, we need to forward
extent_map::block_start by the difference between the end of drop range
and the extent map start.
However in that particular case, the difference is calculated using
(start + len - em->start).
The problem is @start can be modified if the drop range covers any
pinned extent.
This leads to wrong calculation, and would be caught by my later
extent_map sanity checks, which checks the em::block_start against
btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset.
This is a regression caused by commit c962098ca4af ("btrfs: fix
incorrect splitting in btrfs_drop_extent_map_range"), which removed the
@len update for pinned extents.
[FIX]
Fix it by avoiding using @start completely, and use @end - em->start
instead, which @end is exclusive bytenr number.
And update the test case to verify the @block_start to prevent such
problem from happening.
Thankfully this is not going to lead to any data corruption, as IO path
does not utilize btrfs_drop_extent_map_range() with @skip_pinned set.
So this fix is only here for the sake of consistency/correctness.
CC: stable@vger.kernel.org # 6.5+
Fixes: c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-04-09 20:32:34 +09:30
|
|
|
if (em->block_start != SZ_32K + SZ_4K) {
|
|
|
|
test_err("em->block_start is %llu, expected 36K", em->block_start);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:57:33 -04:00
|
|
|
free_extent_map(em);
|
|
|
|
|
|
|
|
read_lock(&em_tree->lock);
|
|
|
|
em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
|
|
|
|
read_unlock(&em_tree->lock);
|
|
|
|
if (em) {
|
|
|
|
test_err("found an unexpected em above 48K");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
free_extent_map(em);
|
2024-01-11 15:13:35 +00:00
|
|
|
/* Unpin our extent to prevent warning when removing it below. */
|
|
|
|
ret2 = unpin_extent_cache(inode, 0, SZ_16K, 0);
|
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
2024-03-21 15:08:38 +00:00
|
|
|
ret2 = free_extent_map_tree(inode);
|
2024-01-11 16:04:26 +00:00
|
|
|
if (ret == 0)
|
|
|
|
ret = ret2;
|
|
|
|
|
2023-08-17 16:57:33 -04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-12-10 20:00:45 +02:00
|
|
|
struct rmap_test_vector {
|
|
|
|
u64 raid_type;
|
|
|
|
u64 physical_start;
|
|
|
|
u64 data_stripe_size;
|
|
|
|
u64 num_data_stripes;
|
|
|
|
u64 num_stripes;
|
|
|
|
/* Assume we won't have more than 5 physical stripes */
|
|
|
|
u64 data_stripe_phys_start[5];
|
|
|
|
bool expected_mapped_addr;
|
|
|
|
/* Physical to logical addresses */
|
|
|
|
u64 mapped_logical[5];
|
|
|
|
};
|
|
|
|
|
|
|
|
static int test_rmap_block(struct btrfs_fs_info *fs_info,
|
|
|
|
struct rmap_test_vector *test)
|
|
|
|
{
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
struct btrfs_chunk_map *map;
|
2019-12-10 20:00:45 +02:00
|
|
|
u64 *logical = NULL;
|
|
|
|
int i, out_ndaddrs, out_stripe_len;
|
|
|
|
int ret;
|
|
|
|
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
map = btrfs_alloc_chunk_map(test->num_stripes, GFP_KERNEL);
|
2019-12-10 20:00:45 +02:00
|
|
|
if (!map) {
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
test_std_err(TEST_ALLOC_CHUNK_MAP);
|
2019-12-10 20:00:45 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Start at 4GiB logical address */
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
map->start = SZ_4G;
|
|
|
|
map->chunk_len = test->data_stripe_size * test->num_data_stripes;
|
|
|
|
map->stripe_size = test->data_stripe_size;
|
2019-12-10 20:00:45 +02:00
|
|
|
map->num_stripes = test->num_stripes;
|
|
|
|
map->type = test->raid_type;
|
|
|
|
|
|
|
|
for (i = 0; i < map->num_stripes; i++) {
|
|
|
|
struct btrfs_device *dev = btrfs_alloc_dummy_device(fs_info);
|
|
|
|
|
|
|
|
if (IS_ERR(dev)) {
|
|
|
|
test_err("cannot allocate device");
|
|
|
|
ret = PTR_ERR(dev);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
map->stripes[i].dev = dev;
|
|
|
|
map->stripes[i].physical = test->data_stripe_phys_start[i];
|
|
|
|
}
|
|
|
|
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
ret = btrfs_add_chunk_map(fs_info, map);
|
2019-12-10 20:00:45 +02:00
|
|
|
if (ret) {
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
test_err("error adding chunk map to mapping tree");
|
2019-12-10 20:00:45 +02:00
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
ret = btrfs_rmap_block(fs_info, map->start, btrfs_sb_offset(1),
|
2019-12-10 20:00:45 +02:00
|
|
|
&logical, &out_ndaddrs, &out_stripe_len);
|
|
|
|
if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
|
|
|
|
test_err("didn't rmap anything but expected %d",
|
|
|
|
test->expected_mapped_addr);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (out_stripe_len != BTRFS_STRIPE_LEN) {
|
|
|
|
test_err("calculated stripe length doesn't match");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (out_ndaddrs != test->expected_mapped_addr) {
|
|
|
|
for (i = 0; i < out_ndaddrs; i++)
|
|
|
|
test_msg("mapped %llu", logical[i]);
|
|
|
|
test_err("unexpected number of mapped addresses: %d", out_ndaddrs);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < out_ndaddrs; i++) {
|
|
|
|
if (logical[i] != test->mapped_logical[i]) {
|
|
|
|
test_err("unexpected logical address mapped");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
out:
|
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes:
1) To actually represent extents for inodes;
2) To represent chunk mappings.
This is odd and has several disadvantages:
1) To create a chunk map, we need to do two memory allocations: one for
an extent_map structure and another one for a map_lookup structure, so
more potential for an allocation failure and more complicated code to
manage and link two structures;
2) For a chunk map we actually only use 3 fields (24 bytes) of the
respective extent map structure: the 'start' field to have the logical
start address of the chunk, the 'len' field to have the chunk's size,
and the 'orig_block_len' field to contain the chunk's stripe size.
Besides wasting a memory, it's also odd and not intuitive at all to
have the stripe size in a field named 'orig_block_len'.
We are also using 'block_len' of the extent_map structure to contain
the chunk size, so we have 2 fields for the same value, 'len' and
'block_len', which is pointless;
3) When an extent map is associated to a chunk mapping, we set the bit
EXTENT_FLAG_FS_MAPPING on its flags and then make its member named
'map_lookup' point to the associated map_lookup structure. This means
that for an extent map associated to an inode extent, we are not using
this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform);
4) Extent maps associated to a chunk mapping are never merged or split so
it's pointless to use the existing extent map infrastructure.
So add a dedicated data structure named 'btrfs_chunk_map' to represent
chunk mappings, this is basically the existing map_lookup structure with
some extra fields:
1) 'start' to contain the chunk logical address;
2) 'chunk_len' to contain the chunk's length;
3) 'stripe_size' for the stripe size;
4) 'rb_node' for insertion into a rb tree;
5) 'refs' for reference counting.
This way we do a single memory allocation for chunk mappings and we don't
waste memory for them with unused/unnecessary fields from an extent_map.
We also save 8 bytes from the extent_map structure by removing the
'map_lookup' pointer, so the size of struct extent_map is reduced from
144 bytes down to 136 bytes, and we can now have 30 extents map per 4K
page instead of 28.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-11-21 13:38:38 +00:00
|
|
|
btrfs_remove_chunk_map(fs_info, map);
|
2019-12-10 20:00:45 +02:00
|
|
|
out_free:
|
|
|
|
kfree(logical);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-01-08 23:06:32 +00:00
|
|
|
int btrfs_test_extent_map(void)
|
2018-01-05 12:51:12 -07:00
|
|
|
{
|
2018-04-03 21:55:17 +02:00
|
|
|
struct btrfs_fs_info *fs_info = NULL;
|
2024-01-11 15:13:35 +00:00
|
|
|
struct inode *inode;
|
|
|
|
struct btrfs_root *root = NULL;
|
2019-12-10 20:00:45 +02:00
|
|
|
int ret = 0, i;
|
|
|
|
struct rmap_test_vector rmap_tests[] = {
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Test a chunk with 2 data stripes one of which
|
2021-05-21 17:42:23 +02:00
|
|
|
* intersects the physical address of the super block
|
2019-12-10 20:00:45 +02:00
|
|
|
* is correctly recognised.
|
|
|
|
*/
|
|
|
|
.raid_type = BTRFS_BLOCK_GROUP_RAID1,
|
|
|
|
.physical_start = SZ_64M - SZ_4M,
|
|
|
|
.data_stripe_size = SZ_256M,
|
|
|
|
.num_data_stripes = 2,
|
|
|
|
.num_stripes = 2,
|
|
|
|
.data_stripe_phys_start =
|
|
|
|
{SZ_64M - SZ_4M, SZ_64M - SZ_4M + SZ_256M},
|
|
|
|
.expected_mapped_addr = true,
|
|
|
|
.mapped_logical= {SZ_4G + SZ_4M}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Test that out-of-range physical addresses are
|
|
|
|
* ignored
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* SINGLE chunk type */
|
|
|
|
.raid_type = 0,
|
|
|
|
.physical_start = SZ_4G,
|
|
|
|
.data_stripe_size = SZ_256M,
|
|
|
|
.num_data_stripes = 1,
|
|
|
|
.num_stripes = 1,
|
|
|
|
.data_stripe_phys_start = {SZ_256M},
|
|
|
|
.expected_mapped_addr = false,
|
|
|
|
.mapped_logical = {0}
|
|
|
|
}
|
|
|
|
};
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2018-05-17 00:00:44 +02:00
|
|
|
test_msg("running extent_map tests");
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2018-04-03 21:55:17 +02:00
|
|
|
/*
|
|
|
|
* Note: the fs_info is not set up completely, we only need
|
|
|
|
* fs_info::fsid for the tracepoint.
|
|
|
|
*/
|
|
|
|
fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
|
|
|
|
if (!fs_info) {
|
2019-03-15 17:28:46 +01:00
|
|
|
test_std_err(TEST_ALLOC_FS_INFO);
|
2018-04-03 21:55:17 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
inode = btrfs_new_test_inode();
|
|
|
|
if (!inode) {
|
|
|
|
test_std_err(TEST_ALLOC_INODE);
|
2019-03-15 18:06:16 +01:00
|
|
|
ret = -ENOMEM;
|
2018-04-03 21:55:17 +02:00
|
|
|
goto out;
|
2019-03-15 18:06:16 +01:00
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
root = btrfs_alloc_dummy_root(fs_info);
|
|
|
|
if (IS_ERR(root)) {
|
|
|
|
test_std_err(TEST_ALLOC_ROOT);
|
|
|
|
ret = PTR_ERR(root);
|
|
|
|
root = NULL;
|
|
|
|
goto out;
|
|
|
|
}
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2024-01-11 15:13:35 +00:00
|
|
|
BTRFS_I(inode)->root = root;
|
|
|
|
|
|
|
|
ret = test_case_1(fs_info, BTRFS_I(inode));
|
2019-03-18 15:05:27 +01:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = test_case_2(fs_info, BTRFS_I(inode));
|
2019-03-18 15:05:27 +01:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = test_case_3(fs_info, BTRFS_I(inode));
|
2019-03-18 15:05:27 +01:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = test_case_4(fs_info, BTRFS_I(inode));
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = test_case_5(fs_info, BTRFS_I(inode));
|
2023-08-17 16:57:32 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = test_case_6(fs_info, BTRFS_I(inode));
|
2023-08-17 16:57:33 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2024-01-11 15:13:35 +00:00
|
|
|
ret = test_case_7(fs_info, BTRFS_I(inode));
|
2023-08-17 16:57:31 -04:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2018-01-05 12:51:12 -07:00
|
|
|
|
2019-12-10 20:00:45 +02:00
|
|
|
test_msg("running rmap tests");
|
|
|
|
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
|
|
|
|
ret = test_rmap_block(fs_info, &rmap_tests[i]);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-04-03 21:55:17 +02:00
|
|
|
out:
|
2024-01-11 15:13:35 +00:00
|
|
|
iput(inode);
|
|
|
|
btrfs_free_dummy_root(root);
|
2018-04-03 21:55:17 +02:00
|
|
|
btrfs_free_dummy_fs_info(fs_info);
|
|
|
|
|
2019-03-15 18:06:16 +01:00
|
|
|
return ret;
|
2018-01-05 12:51:12 -07:00
|
|
|
}
|