mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 00:34:52 +00:00
bcachefs: Unwritten extents support
- bch2_extent_merge checks unwritten bit - read path returns 0s for unwritten extents without actually reading - reflink path skips over unwritten extents - bch2_bkey_ptrs_invalid() checks for extents with both written and unwritten extents, and non-normal extents (stripes, btree ptrs) with unwritten ptrs - fiemap checks for unwritten extents and returns FIEMAP_EXTENT_UNWRITTEN Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
2f1f7fe98d
commit
792031116b
8 changed files with 69 additions and 16 deletions
|
@ -582,7 +582,7 @@ struct bch_extent_ptr {
|
|||
__u64 type:1,
|
||||
cached:1,
|
||||
unused:1,
|
||||
reservation:1,
|
||||
unwritten:1,
|
||||
offset:44, /* 8 petabytes */
|
||||
dev:8,
|
||||
gen:8;
|
||||
|
@ -590,7 +590,7 @@ struct bch_extent_ptr {
|
|||
__u64 gen:8,
|
||||
dev:8,
|
||||
offset:44,
|
||||
reservation:1,
|
||||
unwritten:1,
|
||||
unused:1,
|
||||
cached:1,
|
||||
type:1;
|
||||
|
|
|
@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|||
return -EIO;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
/*
|
||||
* Unwritten extent: no need to actually read, treat it as a
|
||||
* hole and return 0s:
|
||||
*/
|
||||
if (p.ptr.unwritten)
|
||||
return 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
|
||||
/*
|
||||
|
@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
|
|||
rp.ptr.offset + rp.crc.offset ||
|
||||
lp.ptr.dev != rp.ptr.dev ||
|
||||
lp.ptr.gen != rp.ptr.gen ||
|
||||
lp.ptr.unwritten != rp.ptr.unwritten ||
|
||||
lp.has_ec != rp.has_ec)
|
||||
return false;
|
||||
|
||||
|
@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
|
|||
const union bch_extent_entry *entry1, *entry2;
|
||||
struct extent_ptr_decoded p1, p2;
|
||||
|
||||
if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
|
||||
return false;
|
||||
|
||||
bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
|
||||
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
|
||||
if (p1.ptr.dev == p2.ptr.dev &&
|
||||
|
@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
|||
u32 offset;
|
||||
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
|
||||
|
||||
prt_printf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev,
|
||||
b, offset, ptr->gen,
|
||||
ptr->cached ? " cached" : "");
|
||||
|
||||
prt_printf(out, "ptr: %u:%llu:%u gen %u",
|
||||
ptr->dev, b, offset, ptr->gen);
|
||||
if (ptr->cached)
|
||||
prt_str(out, " cached");
|
||||
if (ptr->unwritten)
|
||||
prt_str(out, " unwritten");
|
||||
if (ca && ptr_stale(ca, ptr))
|
||||
prt_printf(out, " stale");
|
||||
}
|
||||
|
@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
|||
unsigned size_ondisk = k.k->size;
|
||||
unsigned nonce = UINT_MAX;
|
||||
unsigned nr_ptrs = 0;
|
||||
bool unwritten = false;
|
||||
int ret;
|
||||
|
||||
if (bkey_is_btree_ptr(k.k))
|
||||
|
@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
|||
false, err);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (nr_ptrs && unwritten != entry->ptr.unwritten) {
|
||||
prt_printf(err, "extent with unwritten and written ptrs");
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
|
||||
prt_printf(err, "has unwritten ptrs");
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
unwritten = entry->ptr.unwritten;
|
||||
nr_ptrs++;
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
|
|
|
@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
if (ptr->unwritten)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
|
||||
{
|
||||
return k.k->type == KEY_TYPE_reservation ||
|
||||
bkey_extent_is_unwritten(k);
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
|
||||
{
|
||||
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
|
||||
|
|
|
@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
|
|||
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
|
||||
}
|
||||
|
||||
static unsigned bkey_to_sector_state(const struct bkey *k)
|
||||
static unsigned bkey_to_sector_state(struct bkey_s_c k)
|
||||
{
|
||||
if (k->type == KEY_TYPE_reservation)
|
||||
if (bkey_extent_is_reservation(k))
|
||||
return SECTOR_RESERVED;
|
||||
if (bkey_extent_is_allocation(k))
|
||||
if (bkey_extent_is_allocation(k.k))
|
||||
return SECTOR_ALLOCATED;
|
||||
return SECTOR_UNALLOCATED;
|
||||
}
|
||||
|
@ -396,7 +396,7 @@ retry:
|
|||
SPOS(inum.inum, offset, snapshot),
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
|
||||
unsigned state = bkey_to_sector_state(k.k);
|
||||
unsigned state = bkey_to_sector_state(k);
|
||||
|
||||
while (pg_idx < nr_pages) {
|
||||
struct page *page = pages[pg_idx];
|
||||
|
@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
|
|||
struct bio_vec bv;
|
||||
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
|
||||
? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
|
||||
unsigned state = bkey_to_sector_state(k.k);
|
||||
unsigned state = bkey_to_sector_state(k);
|
||||
|
||||
bio_for_each_segment(bv, bio, iter)
|
||||
__bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9,
|
||||
|
@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
|||
goto bkey_err;
|
||||
|
||||
/* already reserved */
|
||||
if (k.k->type == KEY_TYPE_reservation &&
|
||||
bkey_s_c_to_reservation(k).v->nr_replicas >= opts.data_replicas) {
|
||||
if (bkey_extent_is_reservation(k) &&
|
||||
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
|
||||
bch2_btree_iter_advance(&iter);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c,
|
|||
int flags2 = 0;
|
||||
u64 offset = p.ptr.offset;
|
||||
|
||||
if (p.ptr.unwritten)
|
||||
flags2 |= FIEMAP_EXTENT_UNWRITTEN;
|
||||
|
||||
if (p.crc.compression_type)
|
||||
flags2 |= FIEMAP_EXTENT_ENCODED;
|
||||
else
|
||||
|
|
|
@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
|||
continue;
|
||||
|
||||
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
k.k->type != KEY_TYPE_reservation &&
|
||||
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
|
||||
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
|
||||
!bkey_extent_is_reservation(k), c,
|
||||
"extent type past end of inode %llu:%u, i_size %llu\n %s",
|
||||
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
|
|
|
@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
|
|||
if (bch2_bkey_has_target(c, k, opts.promote_target))
|
||||
return false;
|
||||
|
||||
if (bkey_extent_is_unwritten(k))
|
||||
return false;
|
||||
|
||||
if (bch2_target_congested(c, opts.promote_target)) {
|
||||
/* XXX trace this */
|
||||
return false;
|
||||
|
|
|
@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
|
|||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret)
|
||||
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
|
||||
if (bkey_extent_is_unwritten(k))
|
||||
continue;
|
||||
|
||||
if (bkey_extent_is_data(k.k))
|
||||
return k;
|
||||
}
|
||||
|
||||
if (bkey_ge(iter->pos, end))
|
||||
bch2_btree_iter_set_pos(iter, end);
|
||||
|
|
Loading…
Add table
Reference in a new issue