mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
for-6.16-rc4-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmhmwFQACgkQxWXV+ddt WDsVMA/+NuSth71V0AfiDnFyqjgDMqIlZL2+dqBiTYHXQQHKbqiUlKvYkWICCT6T 1YgDV+95XJYy4TDBoA49Ndd/l+CiDcMLbOYeneIfbJy13ts84jVANPkl4n03gPkF ktibCw15h0MENVctTCPc71dX2X0cV9WPf4iDmoxUZiukDA376akGTArZKwH4tVVg 4qVpzUtDdNOf848D+8DZKGd+ot/RWgEdLkFCZES27BMg/OFemxBK1MU6K8VjxiKF VoaSVJRDXuug8oVBAGNl86XpiSgd4gHyoNNA5b4mhdSWMSBMxUAaILsONT9pNQZA CFyHA1Jp2gLOIzQIzeXwWgXaAOQDtco8YWYaXhf0v0mySs89tweXjOibfj2mU9pS wPaJyeD+nyRDMwPa4VWEws64D3vXX6aKwiThUENuDmxBvrRXjrkGYH9tf0LNzDDe OKv/vOCfeyutxbjKhP+qElMhdh73BZnJ4UCxxYRRDq2v1Mg+k06swl+6uL6xenme a2KLJlwEoG6LAlkpZzV66ZEaIHDyGBZNdVYtuA/G3dDtmlt0aLXDdp1eq7NivS1j aV7cd0JMX89lAUtqKT932ZOw8RoDrUPPjsnXzCaZJ69mMVyEkxyCV+iYHTTJPDga W5Vg8Tq3d1gwxMebZHvyI6wwUhmGA0wUFG2eohYY/tcSrrUlrHQ= =Ke0p -----END PGP SIGNATURE----- Merge tag 'for-6.16-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - tree-log fixes: - fixes of log tracking of directories and subvolumes - fix iteration and error handling of inode references during log replay - fix free space tree rebuild (reported by syzbot) * tag 'for-6.16-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: use btrfs_record_snapshot_destroy() during rmdir btrfs: propagate last_unlink_trans earlier when doing a rmdir btrfs: record new subvolume in parent dir earlier to avoid dir logging races btrfs: fix inode lookup error handling during log replay btrfs: fix iteration of extrefs during log replay btrfs: fix missing error handling when searching for inode refs during log replay btrfs: fix failure to rebuild free space tree using multiple transactions
This commit is contained in:
commit
4c06e63b92
5 changed files with 131 additions and 88 deletions
|
@ -83,6 +83,8 @@ enum btrfs_block_group_flags {
|
|||
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
/* Does the block group need to be added to the free space tree? */
|
||||
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
|
||||
/* Set after we add a new block group to the free space tree. */
|
||||
BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
|
||||
/* Indicate that the block group is placed on a sequential zone */
|
||||
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
|
||||
/*
|
||||
|
|
|
@ -1241,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
|
|||
{
|
||||
BTRFS_PATH_AUTO_FREE(path);
|
||||
struct btrfs_key key;
|
||||
struct rb_node *node;
|
||||
int nr;
|
||||
int ret;
|
||||
|
||||
|
@ -1269,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
|
|||
btrfs_release_path(path);
|
||||
}
|
||||
|
||||
node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
|
||||
while (node) {
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
bg = rb_entry(node, struct btrfs_block_group, cache_node);
|
||||
clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
|
||||
node = rb_next(node);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1358,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
|
|||
|
||||
block_group = rb_entry(node, struct btrfs_block_group,
|
||||
cache_node);
|
||||
|
||||
if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
|
||||
&block_group->runtime_flags))
|
||||
goto next;
|
||||
|
||||
ret = populate_free_space_tree(trans, block_group);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
next:
|
||||
if (btrfs_should_end_transaction(trans)) {
|
||||
btrfs_end_transaction(trans);
|
||||
trans = btrfs_start_transaction(free_space_root, 1);
|
||||
|
@ -1390,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
|
|||
|
||||
clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
|
||||
|
||||
/*
|
||||
* While rebuilding the free space tree we may allocate new metadata
|
||||
* block groups while modifying the free space tree.
|
||||
*
|
||||
* Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
|
||||
* can use multiple transactions, every time btrfs_end_transaction() is
|
||||
* called at btrfs_rebuild_free_space_tree() we finish the creation of
|
||||
* new block groups by calling btrfs_create_pending_block_groups(), and
|
||||
* that in turn calls us, through add_block_group_free_space(), to add
|
||||
* a free space info item and a free space extent item for the block
|
||||
* group.
|
||||
*
|
||||
* Then later btrfs_rebuild_free_space_tree() may find such new block
|
||||
* groups and processes them with populate_free_space_tree(), which can
|
||||
* fail with EEXIST since there are already items for the block group in
|
||||
* the free space tree. Notice that we say "may find" because a new
|
||||
* block group may be added to the block groups rbtree in a node before
|
||||
* or after the block group currently being processed by the rebuild
|
||||
* process. So signal the rebuild process to skip such new block groups
|
||||
* if it finds them.
|
||||
*/
|
||||
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
|
||||
|
||||
ret = add_new_free_space_info(trans, block_group, path);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
int ret = 0;
|
||||
struct btrfs_trans_handle *trans;
|
||||
u64 last_unlink_trans;
|
||||
struct fscrypt_name fname;
|
||||
|
||||
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
|
||||
|
@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
goto out_notrans;
|
||||
}
|
||||
|
||||
/*
|
||||
* Propagate the last_unlink_trans value of the deleted dir to its
|
||||
* parent directory. This is to prevent an unrecoverable log tree in the
|
||||
* case we do something like this:
|
||||
* 1) create dir foo
|
||||
* 2) create snapshot under dir foo
|
||||
* 3) delete the snapshot
|
||||
* 4) rmdir foo
|
||||
* 5) mkdir foo
|
||||
* 6) fsync foo or some file inside foo
|
||||
*
|
||||
* This is because we can't unlink other roots when replaying the dir
|
||||
* deletes for directory foo.
|
||||
*/
|
||||
if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
|
||||
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
|
||||
|
||||
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
|
||||
ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
|
||||
goto out;
|
||||
|
@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
|
||||
|
||||
/* now the directory is empty */
|
||||
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
|
||||
&fname.disk_name);
|
||||
if (!ret) {
|
||||
if (!ret)
|
||||
btrfs_i_size_write(BTRFS_I(inode), 0);
|
||||
/*
|
||||
* Propagate the last_unlink_trans value of the deleted dir to
|
||||
* its parent directory. This is to prevent an unrecoverable
|
||||
* log tree in the case we do something like this:
|
||||
* 1) create dir foo
|
||||
* 2) create snapshot under dir foo
|
||||
* 3) delete the snapshot
|
||||
* 4) rmdir foo
|
||||
* 5) mkdir foo
|
||||
* 6) fsync foo or some file inside foo
|
||||
*/
|
||||
if (last_unlink_trans >= trans->transid)
|
||||
BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
|
||||
}
|
||||
out:
|
||||
btrfs_end_transaction(trans);
|
||||
out_notrans:
|
||||
|
|
|
@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
|
|||
goto out;
|
||||
}
|
||||
|
||||
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
|
||||
|
||||
ret = btrfs_create_new_inode(trans, &new_inode_args);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
|
||||
|
||||
d_instantiate_new(dentry, new_inode_args.inode);
|
||||
new_inode_args.inode = NULL;
|
||||
|
||||
|
|
|
@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
|
|||
unsigned int nofs_flag;
|
||||
struct btrfs_inode *inode;
|
||||
|
||||
/* Only meant to be called for subvolume roots and not for log roots. */
|
||||
ASSERT(is_fstree(btrfs_root_id(root)));
|
||||
|
||||
/*
|
||||
* We're holding a transaction handle whether we are logging or
|
||||
* replaying a log tree, so we must make sure NOFS semantics apply
|
||||
|
@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* simple helper to read an inode off the disk from a given root
|
||||
* This can only be called for subvolume roots and not for the log
|
||||
*/
|
||||
static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root,
|
||||
u64 objectid)
|
||||
{
|
||||
struct btrfs_inode *inode;
|
||||
|
||||
inode = btrfs_iget_logging(objectid, root);
|
||||
if (IS_ERR(inode))
|
||||
return NULL;
|
||||
return inode;
|
||||
}
|
||||
|
||||
/* replays a single extent in 'eb' at 'slot' with 'key' into the
|
||||
* subvolume 'root'. path is released on entry and should be released
|
||||
* on exit.
|
||||
|
@ -674,9 +662,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
|||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
inode = read_one_inode(root, key->objectid);
|
||||
if (!inode)
|
||||
return -EIO;
|
||||
inode = btrfs_iget_logging(key->objectid, root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
/*
|
||||
* first check to see if we already have this extent in the
|
||||
|
@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
|
|||
|
||||
btrfs_release_path(path);
|
||||
|
||||
inode = read_one_inode(root, location.objectid);
|
||||
if (!inode) {
|
||||
ret = -EIO;
|
||||
inode = btrfs_iget_logging(location.objectid, root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
inode = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -1073,7 +1062,9 @@ again:
|
|||
search_key.type = BTRFS_INODE_REF_KEY;
|
||||
search_key.offset = parent_objectid;
|
||||
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
|
||||
if (ret == 0) {
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret == 0) {
|
||||
struct btrfs_inode_ref *victim_ref;
|
||||
unsigned long ptr;
|
||||
unsigned long ptr_end;
|
||||
|
@ -1146,13 +1137,13 @@ again:
|
|||
struct fscrypt_str victim_name;
|
||||
|
||||
extref = (struct btrfs_inode_extref *)(base + cur_offset);
|
||||
victim_name.len = btrfs_inode_extref_name_len(leaf, extref);
|
||||
|
||||
if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
|
||||
goto next;
|
||||
|
||||
ret = read_alloc_one_name(leaf, &extref->name,
|
||||
btrfs_inode_extref_name_len(leaf, extref),
|
||||
&victim_name);
|
||||
victim_name.len, &victim_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -1167,10 +1158,10 @@ again:
|
|||
kfree(victim_name.name);
|
||||
return ret;
|
||||
} else if (!ret) {
|
||||
ret = -ENOENT;
|
||||
victim_parent = read_one_inode(root,
|
||||
parent_objectid);
|
||||
if (victim_parent) {
|
||||
victim_parent = btrfs_iget_logging(parent_objectid, root);
|
||||
if (IS_ERR(victim_parent)) {
|
||||
ret = PTR_ERR(victim_parent);
|
||||
} else {
|
||||
inc_nlink(&inode->vfs_inode);
|
||||
btrfs_release_path(path);
|
||||
|
||||
|
@ -1315,9 +1306,9 @@ again:
|
|||
struct btrfs_inode *dir;
|
||||
|
||||
btrfs_release_path(path);
|
||||
dir = read_one_inode(root, parent_id);
|
||||
if (!dir) {
|
||||
ret = -ENOENT;
|
||||
dir = btrfs_iget_logging(parent_id, root);
|
||||
if (IS_ERR(dir)) {
|
||||
ret = PTR_ERR(dir);
|
||||
kfree(name.name);
|
||||
goto out;
|
||||
}
|
||||
|
@ -1389,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
|
|||
* copy the back ref in. The link count fixup code will take
|
||||
* care of the rest
|
||||
*/
|
||||
dir = read_one_inode(root, parent_objectid);
|
||||
if (!dir) {
|
||||
ret = -ENOENT;
|
||||
dir = btrfs_iget_logging(parent_objectid, root);
|
||||
if (IS_ERR(dir)) {
|
||||
ret = PTR_ERR(dir);
|
||||
dir = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode = read_one_inode(root, inode_objectid);
|
||||
if (!inode) {
|
||||
ret = -EIO;
|
||||
inode = btrfs_iget_logging(inode_objectid, root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
inode = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -1409,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
|
|||
* parent object can change from one array
|
||||
* item to another.
|
||||
*/
|
||||
if (!dir)
|
||||
dir = read_one_inode(root, parent_objectid);
|
||||
if (!dir) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
dir = btrfs_iget_logging(parent_objectid, root);
|
||||
if (IS_ERR(dir)) {
|
||||
ret = PTR_ERR(dir);
|
||||
dir = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
|
||||
|
@ -1682,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
|
|||
break;
|
||||
|
||||
btrfs_release_path(path);
|
||||
inode = read_one_inode(root, key.offset);
|
||||
if (!inode) {
|
||||
ret = -EIO;
|
||||
inode = btrfs_iget_logging(key.offset, root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1720,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_inode *inode;
|
||||
struct inode *vfs_inode;
|
||||
|
||||
inode = read_one_inode(root, objectid);
|
||||
if (!inode)
|
||||
return -EIO;
|
||||
inode = btrfs_iget_logging(objectid, root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
vfs_inode = &inode->vfs_inode;
|
||||
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
|
||||
|
@ -1761,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_inode *dir;
|
||||
int ret;
|
||||
|
||||
inode = read_one_inode(root, location->objectid);
|
||||
if (!inode)
|
||||
return -ENOENT;
|
||||
inode = btrfs_iget_logging(location->objectid, root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
dir = read_one_inode(root, dirid);
|
||||
if (!dir) {
|
||||
dir = btrfs_iget_logging(dirid, root);
|
||||
if (IS_ERR(dir)) {
|
||||
iput(&inode->vfs_inode);
|
||||
return -EIO;
|
||||
return PTR_ERR(dir);
|
||||
}
|
||||
|
||||
ret = btrfs_add_link(trans, dir, inode, name, 1, index);
|
||||
|
@ -1845,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
|
|||
bool update_size = true;
|
||||
bool name_added = false;
|
||||
|
||||
dir = read_one_inode(root, key->objectid);
|
||||
if (!dir)
|
||||
return -EIO;
|
||||
dir = btrfs_iget_logging(key->objectid, root);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
|
||||
if (ret)
|
||||
|
@ -2147,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
|
|||
btrfs_dir_item_key_to_cpu(eb, di, &location);
|
||||
btrfs_release_path(path);
|
||||
btrfs_release_path(log_path);
|
||||
inode = read_one_inode(root, location.objectid);
|
||||
if (!inode) {
|
||||
ret = -EIO;
|
||||
inode = btrfs_iget_logging(location.objectid, root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
inode = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -2301,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
|
|||
if (!log_path)
|
||||
return -ENOMEM;
|
||||
|
||||
dir = read_one_inode(root, dirid);
|
||||
/* it isn't an error if the inode isn't there, that can happen
|
||||
* because we replay the deletes before we copy in the inode item
|
||||
* from the log
|
||||
dir = btrfs_iget_logging(dirid, root);
|
||||
/*
|
||||
* It isn't an error if the inode isn't there, that can happen because
|
||||
* we replay the deletes before we copy in the inode item from the log.
|
||||
*/
|
||||
if (!dir) {
|
||||
if (IS_ERR(dir)) {
|
||||
btrfs_free_path(log_path);
|
||||
return 0;
|
||||
ret = PTR_ERR(dir);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
range_start = 0;
|
||||
|
@ -2467,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
|
|||
struct btrfs_inode *inode;
|
||||
u64 from;
|
||||
|
||||
inode = read_one_inode(root, key.objectid);
|
||||
if (!inode) {
|
||||
ret = -EIO;
|
||||
inode = btrfs_iget_logging(key.objectid, root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
break;
|
||||
}
|
||||
from = ALIGN(i_size_read(&inode->vfs_inode),
|
||||
|
@ -7448,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
|
|||
* full log sync.
|
||||
* Also we don't need to worry with renames, since btrfs_rename() marks the log
|
||||
* for full commit when renaming a subvolume.
|
||||
*
|
||||
* Must be called before creating the subvolume entry in its parent directory.
|
||||
*/
|
||||
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *dir)
|
||||
|
|
Loading…
Add table
Reference in a new issue