From b62fd63ade7cb573b114972ef8f9fa505be8d74a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 3 Sep 2025 16:53:21 +0100
Subject: [PATCH 1/4] btrfs: fix invalid extref key setup when replaying dentry

The offset for an extref item's key is not the object ID of the parent
dir, otherwise we would not need the extref item and would use plain ref
items. Instead the offset is the result of a hash computation that uses
the object ID of the parent dir and the name associated to the entry.
So fix this by setting the key offset at replay_one_name() to be the
result of calling btrfs_extref_hash().

Fixes: 725af92a6251 ("btrfs: Open-code name_in_log_ref in replay_one_name")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7d5d90845ca9..7a63afedd01e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1964,7 +1964,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 
 	search_key.objectid = log_key.objectid;
 	search_key.type = BTRFS_INODE_EXTREF_KEY;
-	search_key.offset = key->objectid;
+	search_key.offset = btrfs_extref_hash(key->objectid, name.name, name.len);
 	ret = backref_in_log(root->log_root, &search_key, key->objectid, &name);
 	if (ret < 0) {
 		goto out;

From 5b8d2964754102323ca24495ba94892426284e3a Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Fri, 5 Sep 2025 15:54:43 +0200
Subject: [PATCH 2/4] btrfs: zoned: fix incorrect ASSERT in
 btrfs_zoned_reserve_data_reloc_bg()

When moving a block-group to the dedicated data relocation space-info in
btrfs_zoned_reserve_data_reloc_bg() it is asserted that the newly
created block group for data relocation does not contain any
zone_unusable bytes.

But on disks with zone_capacity < zone_size, the difference between
zone_size and zone_capacity is accounted as zone_unusable.

Instead of asserting that the block-group does not contain any
zone_unusable bytes, remove them from the block-groups total size.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Link: https://lore.kernel.org/linux-block/CAHj4cs8-cS2E+-xQ-d2Bj6vMJZ+CwT_cbdWBTju4BV35LsvEYw@mail.gmail.com/
Fixes: daa0fde322350 ("btrfs: zoned: fix data relocation block group reservation")
Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index ea662036f441..efc2a81f50e5 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2582,9 +2582,9 @@ again:
 			spin_lock(&space_info->lock);
 			space_info->total_bytes -= bg->length;
 			space_info->disk_total -= bg->length * factor;
+			space_info->disk_total -= bg->zone_unusable;
 			/* There is no allocation ever happened. */
 			ASSERT(bg->used == 0);
-			ASSERT(bg->zone_unusable == 0);
 			/* No super block in a block group on the zoned setup. */
 			ASSERT(bg->bytes_super == 0);
 			spin_unlock(&space_info->lock);

From 8679d2687c351824d08cf1f0e86f3b65f22a00fe Mon Sep 17 00:00:00 2001
From: austinchang <austinchang@synology.com>
Date: Thu, 11 Sep 2025 06:06:29 +0000
Subject: [PATCH 3/4] btrfs: initialize inode::file_extent_tree after i_mode
 has been set

btrfs_init_file_extent_tree() uses S_ISREG() to determine if the file is
a regular file. In the beginning of btrfs_read_locked_inode(), the i_mode
hasn't been read from inode item, then file_extent_tree won't be used at
all in volumes without NO_HOLES.

Fix this by calling btrfs_init_file_extent_tree() after i_mode is
initialized in btrfs_read_locked_inode().

Fixes: 3d7db6e8bd22e6 ("btrfs: don't allocate file extent tree for non regular files")
CC: stable@vger.kernel.org # 6.12+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: austinchang <austinchang@synology.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c |  3 ---
 fs/btrfs/inode.c         | 11 +++++------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0f8d8e275143..c0c1ddd46b67 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1843,7 +1843,6 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 
 int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
 {
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_delayed_node *delayed_node;
 	struct btrfs_inode_item *inode_item;
 	struct inode *vfs_inode = &inode->vfs_inode;
@@ -1864,8 +1863,6 @@ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
 	i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item));
 	i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item));
 	btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
-	btrfs_inode_set_file_extent_range(inode, 0,
-			round_up(i_size_read(vfs_inode), fs_info->sectorsize));
 	vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item);
 	set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item));
 	inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e7218e78bff4..18db1053cdf0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3885,10 +3885,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
 	bool filled = false;
 	int first_xattr_slot;
 
-	ret = btrfs_init_file_extent_tree(inode);
-	if (ret)
-		goto out;
-
 	ret = btrfs_fill_inode(inode, &rdev);
 	if (!ret)
 		filled = true;
@@ -3920,8 +3916,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
 	i_uid_write(vfs_inode, btrfs_inode_uid(leaf, inode_item));
 	i_gid_write(vfs_inode, btrfs_inode_gid(leaf, inode_item));
 	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
-	btrfs_inode_set_file_extent_range(inode, 0,
-			round_up(i_size_read(vfs_inode), fs_info->sectorsize));
 
 	inode_set_atime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->atime),
 			btrfs_timespec_nsec(leaf, &inode_item->atime));
@@ -3953,6 +3947,11 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
 	btrfs_set_inode_mapping_order(inode);
 
 cache_index:
+	ret = btrfs_init_file_extent_tree(inode);
+	if (ret)
+		goto out;
+	btrfs_inode_set_file_extent_range(inode, 0,
+			round_up(i_size_read(vfs_inode), fs_info->sectorsize));
 	/*
 	 * If we were modified in the current generation and evicted from memory
 	 * and then re-read we need to do a full sync since we don't have any

From 80eb65ccf6f72dc37b972583fe71cd8a50ff7e51 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 8 Sep 2025 12:51:11 +0100
Subject: [PATCH 4/4] btrfs: annotate block group access with data_race() when
 sorting for reclaim

When sorting the block group list for reclaim we are using a block group's
used bytes counter without taking the block group's spinlock, so we can
race with a concurrent task updating it (at btrfs_update_block_group()),
which makes tools like KCSAN unhappy and report a race.

Since the sorting is not strictly needed from a functional perspective
and such races should rarely cause any ordering changes (only load/store
tearing could cause them), not to mention that after the sorting the
ordering may no longer be accurate due to concurrent allocations and
deallocations of extents in a block group, annotate the accesses to the
used counter with data_race() to silence KCSAN and similar tools.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9bf282d2453c..499a9edf0ca3 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1795,7 +1795,14 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
 	bg1 = list_entry(a, struct btrfs_block_group, bg_list);
 	bg2 = list_entry(b, struct btrfs_block_group, bg_list);
 
-	return bg1->used > bg2->used;
+	/*
+	 * Some other task may be updating the ->used field concurrently, but it
+	 * is not serious if we get a stale value or load/store tearing issues,
+	 * as sorting the list of block groups to reclaim is not critical and an
+	 * occasional imperfect order is ok. So silence KCSAN and avoid the
+	 * overhead of locking or any other synchronization.
+	 */
+	return data_race(bg1->used > bg2->used);
 }
 
 static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)