mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
bcachefs: Self healing on read IO error
This repurposes the promote path, which already knows how to call data_update() after a read: we now automatically rewrite bad data when we get a read error and then successfully retry from a different replica. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b1d63b06e8
commit
a2cb8a6236
3 changed files with 53 additions and 26 deletions
|
|
@ -37,8 +37,8 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
|
||||||
struct bch_extent_crc_unpacked,
|
struct bch_extent_crc_unpacked,
|
||||||
enum bch_extent_entry_type);
|
enum bch_extent_entry_type);
|
||||||
|
|
||||||
static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
|
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
|
||||||
unsigned dev)
|
unsigned dev)
|
||||||
{
|
{
|
||||||
struct bch_dev_io_failures *i;
|
struct bch_dev_io_failures *i;
|
||||||
|
|
||||||
|
|
@ -52,7 +52,7 @@ static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
|
||||||
void bch2_mark_io_failure(struct bch_io_failures *failed,
|
void bch2_mark_io_failure(struct bch_io_failures *failed,
|
||||||
struct extent_ptr_decoded *p)
|
struct extent_ptr_decoded *p)
|
||||||
{
|
{
|
||||||
struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
|
struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, p->ptr.dev);
|
||||||
|
|
||||||
if (!f) {
|
if (!f) {
|
||||||
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
|
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
|
||||||
|
|
@ -140,7 +140,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||||
if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
|
if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
|
f = failed ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
|
||||||
if (f)
|
if (f)
|
||||||
p.idx = f->nr_failed < f->nr_retries
|
p.idx = f->nr_failed < f->nr_retries
|
||||||
? f->idx
|
? f->idx
|
||||||
|
|
|
||||||
|
|
@ -399,6 +399,8 @@ out: \
|
||||||
|
|
||||||
/* utility code common to all keys with pointers: */
|
/* utility code common to all keys with pointers: */
|
||||||
|
|
||||||
|
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
|
||||||
|
unsigned);
|
||||||
void bch2_mark_io_failure(struct bch_io_failures *,
|
void bch2_mark_io_failure(struct bch_io_failures *,
|
||||||
struct extent_ptr_decoded *);
|
struct extent_ptr_decoded *);
|
||||||
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
|
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
|
||||||
|
|
|
||||||
|
|
@ -93,21 +93,24 @@ static const struct rhashtable_params bch_promote_params = {
|
||||||
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
|
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
|
||||||
struct bpos pos,
|
struct bpos pos,
|
||||||
struct bch_io_opts opts,
|
struct bch_io_opts opts,
|
||||||
unsigned flags)
|
unsigned flags,
|
||||||
|
struct bch_io_failures *failed)
|
||||||
{
|
{
|
||||||
BUG_ON(!opts.promote_target);
|
if (!failed) {
|
||||||
|
BUG_ON(!opts.promote_target);
|
||||||
|
|
||||||
if (!(flags & BCH_READ_MAY_PROMOTE))
|
if (!(flags & BCH_READ_MAY_PROMOTE))
|
||||||
return -BCH_ERR_nopromote_may_not;
|
return -BCH_ERR_nopromote_may_not;
|
||||||
|
|
||||||
if (bch2_bkey_has_target(c, k, opts.promote_target))
|
if (bch2_bkey_has_target(c, k, opts.promote_target))
|
||||||
return -BCH_ERR_nopromote_already_promoted;
|
return -BCH_ERR_nopromote_already_promoted;
|
||||||
|
|
||||||
if (bkey_extent_is_unwritten(k))
|
if (bkey_extent_is_unwritten(k))
|
||||||
return -BCH_ERR_nopromote_unwritten;
|
return -BCH_ERR_nopromote_unwritten;
|
||||||
|
|
||||||
if (bch2_target_congested(c, opts.promote_target))
|
if (bch2_target_congested(c, opts.promote_target))
|
||||||
return -BCH_ERR_nopromote_congested;
|
return -BCH_ERR_nopromote_congested;
|
||||||
|
}
|
||||||
|
|
||||||
if (rhashtable_lookup_fast(&c->promote_table, &pos,
|
if (rhashtable_lookup_fast(&c->promote_table, &pos,
|
||||||
bch_promote_params))
|
bch_promote_params))
|
||||||
|
|
@ -164,7 +167,8 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
||||||
struct extent_ptr_decoded *pick,
|
struct extent_ptr_decoded *pick,
|
||||||
struct bch_io_opts opts,
|
struct bch_io_opts opts,
|
||||||
unsigned sectors,
|
unsigned sectors,
|
||||||
struct bch_read_bio **rbio)
|
struct bch_read_bio **rbio,
|
||||||
|
struct bch_io_failures *failed)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct promote_op *op = NULL;
|
struct promote_op *op = NULL;
|
||||||
|
|
@ -217,14 +221,28 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
||||||
bio = &op->write.op.wbio.bio;
|
bio = &op->write.op.wbio.bio;
|
||||||
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
|
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
|
||||||
|
|
||||||
|
struct data_update_opts update_opts = {};
|
||||||
|
|
||||||
|
if (!failed) {
|
||||||
|
update_opts.target = opts.promote_target;
|
||||||
|
update_opts.extra_replicas = 1;
|
||||||
|
update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
|
||||||
|
} else {
|
||||||
|
update_opts.target = opts.foreground_target;
|
||||||
|
|
||||||
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||||
|
unsigned i = 0;
|
||||||
|
bkey_for_each_ptr(ptrs, ptr) {
|
||||||
|
if (bch2_dev_io_failures(failed, ptr->dev))
|
||||||
|
update_opts.rewrite_ptrs |= BIT(i);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
|
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
|
||||||
writepoint_hashed((unsigned long) current),
|
writepoint_hashed((unsigned long) current),
|
||||||
opts,
|
opts,
|
||||||
(struct data_update_opts) {
|
update_opts,
|
||||||
.target = opts.promote_target,
|
|
||||||
.extra_replicas = 1,
|
|
||||||
.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
|
|
||||||
},
|
|
||||||
btree_id, k);
|
btree_id, k);
|
||||||
/*
|
/*
|
||||||
* possible errors: -BCH_ERR_nocow_lock_blocked,
|
* possible errors: -BCH_ERR_nocow_lock_blocked,
|
||||||
|
|
@ -258,10 +276,17 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
|
||||||
unsigned flags,
|
unsigned flags,
|
||||||
struct bch_read_bio **rbio,
|
struct bch_read_bio **rbio,
|
||||||
bool *bounce,
|
bool *bounce,
|
||||||
bool *read_full)
|
bool *read_full,
|
||||||
|
struct bch_io_failures *failed)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
|
/*
|
||||||
|
* if failed != NULL we're not actually doing a promote, we're
|
||||||
|
* recovering from an io/checksum error
|
||||||
|
*/
|
||||||
|
bool promote_full = (failed ||
|
||||||
|
*read_full ||
|
||||||
|
READ_ONCE(c->promote_whole_extents));
|
||||||
/* data might have to be decompressed in the write path: */
|
/* data might have to be decompressed in the write path: */
|
||||||
unsigned sectors = promote_full
|
unsigned sectors = promote_full
|
||||||
? max(pick->crc.compressed_size, pick->crc.live_size)
|
? max(pick->crc.compressed_size, pick->crc.live_size)
|
||||||
|
|
@ -272,7 +297,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
|
||||||
struct promote_op *promote;
|
struct promote_op *promote;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = should_promote(c, k, pos, opts, flags);
|
ret = should_promote(c, k, pos, opts, flags, failed);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto nopromote;
|
goto nopromote;
|
||||||
|
|
||||||
|
|
@ -280,7 +305,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
|
||||||
k.k->type == KEY_TYPE_reflink_v
|
k.k->type == KEY_TYPE_reflink_v
|
||||||
? BTREE_ID_reflink
|
? BTREE_ID_reflink
|
||||||
: BTREE_ID_extents,
|
: BTREE_ID_extents,
|
||||||
k, pos, pick, opts, sectors, rbio);
|
k, pos, pick, opts, sectors, rbio, failed);
|
||||||
ret = PTR_ERR_OR_ZERO(promote);
|
ret = PTR_ERR_OR_ZERO(promote);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto nopromote;
|
goto nopromote;
|
||||||
|
|
@ -910,9 +935,9 @@ retry_pick:
|
||||||
bounce = true;
|
bounce = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orig->opts.promote_target)
|
if (orig->opts.promote_target)// || failed)
|
||||||
promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
|
promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
|
||||||
&rbio, &bounce, &read_full);
|
&rbio, &bounce, &read_full, failed);
|
||||||
|
|
||||||
if (!read_full) {
|
if (!read_full) {
|
||||||
EBUG_ON(crc_is_compressed(pick.crc));
|
EBUG_ON(crc_is_compressed(pick.crc));
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue