mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
bcachefs: Unwritten extents support
- bch2_extent_merge checks unwritten bit - read path returns 0s for unwritten extents without actually reading - reflink path skips over unwritten extents - bch2_bkey_ptrs_invalid() checks for extents with both written and unwritten extents, and non-normal extents (stripes, btree ptrs) with unwritten ptrs - fiemap checks for unwritten extents and returns FIEMAP_EXTENT_UNWRITTEN Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
2f1f7fe98d
commit
792031116b
8 changed files with 69 additions and 16 deletions
|
@ -582,7 +582,7 @@ struct bch_extent_ptr {
|
||||||
__u64 type:1,
|
__u64 type:1,
|
||||||
cached:1,
|
cached:1,
|
||||||
unused:1,
|
unused:1,
|
||||||
reservation:1,
|
unwritten:1,
|
||||||
offset:44, /* 8 petabytes */
|
offset:44, /* 8 petabytes */
|
||||||
dev:8,
|
dev:8,
|
||||||
gen:8;
|
gen:8;
|
||||||
|
@ -590,7 +590,7 @@ struct bch_extent_ptr {
|
||||||
__u64 gen:8,
|
__u64 gen:8,
|
||||||
dev:8,
|
dev:8,
|
||||||
offset:44,
|
offset:44,
|
||||||
reservation:1,
|
unwritten:1,
|
||||||
unused:1,
|
unused:1,
|
||||||
cached:1,
|
cached:1,
|
||||||
type:1;
|
type:1;
|
||||||
|
|
|
@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||||
|
/*
|
||||||
|
* Unwritten extent: no need to actually read, treat it as a
|
||||||
|
* hole and return 0s:
|
||||||
|
*/
|
||||||
|
if (p.ptr.unwritten)
|
||||||
|
return 0;
|
||||||
|
|
||||||
ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
|
||||||
rp.ptr.offset + rp.crc.offset ||
|
rp.ptr.offset + rp.crc.offset ||
|
||||||
lp.ptr.dev != rp.ptr.dev ||
|
lp.ptr.dev != rp.ptr.dev ||
|
||||||
lp.ptr.gen != rp.ptr.gen ||
|
lp.ptr.gen != rp.ptr.gen ||
|
||||||
|
lp.ptr.unwritten != rp.ptr.unwritten ||
|
||||||
lp.has_ec != rp.has_ec)
|
lp.has_ec != rp.has_ec)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
|
||||||
const union bch_extent_entry *entry1, *entry2;
|
const union bch_extent_entry *entry1, *entry2;
|
||||||
struct extent_ptr_decoded p1, p2;
|
struct extent_ptr_decoded p1, p2;
|
||||||
|
|
||||||
|
if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
|
||||||
|
return false;
|
||||||
|
|
||||||
bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
|
bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
|
||||||
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
|
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
|
||||||
if (p1.ptr.dev == p2.ptr.dev &&
|
if (p1.ptr.dev == p2.ptr.dev &&
|
||||||
|
@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||||
u32 offset;
|
u32 offset;
|
||||||
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
|
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
|
||||||
|
|
||||||
prt_printf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev,
|
prt_printf(out, "ptr: %u:%llu:%u gen %u",
|
||||||
b, offset, ptr->gen,
|
ptr->dev, b, offset, ptr->gen);
|
||||||
ptr->cached ? " cached" : "");
|
if (ptr->cached)
|
||||||
|
prt_str(out, " cached");
|
||||||
|
if (ptr->unwritten)
|
||||||
|
prt_str(out, " unwritten");
|
||||||
if (ca && ptr_stale(ca, ptr))
|
if (ca && ptr_stale(ca, ptr))
|
||||||
prt_printf(out, " stale");
|
prt_printf(out, " stale");
|
||||||
}
|
}
|
||||||
|
@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||||
unsigned size_ondisk = k.k->size;
|
unsigned size_ondisk = k.k->size;
|
||||||
unsigned nonce = UINT_MAX;
|
unsigned nonce = UINT_MAX;
|
||||||
unsigned nr_ptrs = 0;
|
unsigned nr_ptrs = 0;
|
||||||
|
bool unwritten = false;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (bkey_is_btree_ptr(k.k))
|
if (bkey_is_btree_ptr(k.k))
|
||||||
|
@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||||
false, err);
|
false, err);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
if (nr_ptrs && unwritten != entry->ptr.unwritten) {
|
||||||
|
prt_printf(err, "extent with unwritten and written ptrs");
|
||||||
|
return -BCH_ERR_invalid_bkey;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
|
||||||
|
prt_printf(err, "has unwritten ptrs");
|
||||||
|
return -BCH_ERR_invalid_bkey;
|
||||||
|
}
|
||||||
|
|
||||||
|
unwritten = entry->ptr.unwritten;
|
||||||
nr_ptrs++;
|
nr_ptrs++;
|
||||||
break;
|
break;
|
||||||
case BCH_EXTENT_ENTRY_crc32:
|
case BCH_EXTENT_ENTRY_crc32:
|
||||||
|
|
|
@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||||
|
const struct bch_extent_ptr *ptr;
|
||||||
|
|
||||||
|
bkey_for_each_ptr(ptrs, ptr)
|
||||||
|
if (ptr->unwritten)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
return k.k->type == KEY_TYPE_reservation ||
|
||||||
|
bkey_extent_is_unwritten(k);
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
|
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
|
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
|
||||||
|
|
|
@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
|
||||||
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
|
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned bkey_to_sector_state(const struct bkey *k)
|
static unsigned bkey_to_sector_state(struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
if (k->type == KEY_TYPE_reservation)
|
if (bkey_extent_is_reservation(k))
|
||||||
return SECTOR_RESERVED;
|
return SECTOR_RESERVED;
|
||||||
if (bkey_extent_is_allocation(k))
|
if (bkey_extent_is_allocation(k.k))
|
||||||
return SECTOR_ALLOCATED;
|
return SECTOR_ALLOCATED;
|
||||||
return SECTOR_UNALLOCATED;
|
return SECTOR_UNALLOCATED;
|
||||||
}
|
}
|
||||||
|
@ -396,7 +396,7 @@ retry:
|
||||||
SPOS(inum.inum, offset, snapshot),
|
SPOS(inum.inum, offset, snapshot),
|
||||||
BTREE_ITER_SLOTS, k, ret) {
|
BTREE_ITER_SLOTS, k, ret) {
|
||||||
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
|
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
|
||||||
unsigned state = bkey_to_sector_state(k.k);
|
unsigned state = bkey_to_sector_state(k);
|
||||||
|
|
||||||
while (pg_idx < nr_pages) {
|
while (pg_idx < nr_pages) {
|
||||||
struct page *page = pages[pg_idx];
|
struct page *page = pages[pg_idx];
|
||||||
|
@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
|
||||||
struct bio_vec bv;
|
struct bio_vec bv;
|
||||||
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
|
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
|
||||||
? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
|
? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
|
||||||
unsigned state = bkey_to_sector_state(k.k);
|
unsigned state = bkey_to_sector_state(k);
|
||||||
|
|
||||||
bio_for_each_segment(bv, bio, iter)
|
bio_for_each_segment(bv, bio, iter)
|
||||||
__bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9,
|
__bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9,
|
||||||
|
@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
||||||
goto bkey_err;
|
goto bkey_err;
|
||||||
|
|
||||||
/* already reserved */
|
/* already reserved */
|
||||||
if (k.k->type == KEY_TYPE_reservation &&
|
if (bkey_extent_is_reservation(k) &&
|
||||||
bkey_s_c_to_reservation(k).v->nr_replicas >= opts.data_replicas) {
|
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
|
||||||
bch2_btree_iter_advance(&iter);
|
bch2_btree_iter_advance(&iter);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c,
|
||||||
int flags2 = 0;
|
int flags2 = 0;
|
||||||
u64 offset = p.ptr.offset;
|
u64 offset = p.ptr.offset;
|
||||||
|
|
||||||
|
if (p.ptr.unwritten)
|
||||||
|
flags2 |= FIEMAP_EXTENT_UNWRITTEN;
|
||||||
|
|
||||||
if (p.crc.compression_type)
|
if (p.crc.compression_type)
|
||||||
flags2 |= FIEMAP_EXTENT_ENCODED;
|
flags2 |= FIEMAP_EXTENT_ENCODED;
|
||||||
else
|
else
|
||||||
|
|
|
@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
|
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
|
||||||
k.k->type != KEY_TYPE_reservation &&
|
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
|
||||||
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
|
!bkey_extent_is_reservation(k), c,
|
||||||
"extent type past end of inode %llu:%u, i_size %llu\n %s",
|
"extent type past end of inode %llu:%u, i_size %llu\n %s",
|
||||||
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
|
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
|
||||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||||
|
|
|
@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
|
||||||
if (bch2_bkey_has_target(c, k, opts.promote_target))
|
if (bch2_bkey_has_target(c, k, opts.promote_target))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (bkey_extent_is_unwritten(k))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (bch2_target_congested(c, opts.promote_target)) {
|
if (bch2_target_congested(c, opts.promote_target)) {
|
||||||
/* XXX trace this */
|
/* XXX trace this */
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret)
|
for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
|
||||||
|
if (bkey_extent_is_unwritten(k))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (bkey_extent_is_data(k.k))
|
if (bkey_extent_is_data(k.k))
|
||||||
return k;
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
if (bkey_ge(iter->pos, end))
|
if (bkey_ge(iter->pos, end))
|
||||||
bch2_btree_iter_set_pos(iter, end);
|
bch2_btree_iter_set_pos(iter, end);
|
||||||
|
|
Loading…
Add table
Reference in a new issue