mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-05-24 10:39:52 +00:00
btrfs: repair all known bad mirrors
When there is more than a single level of redundancy there can also be multiple bad mirrors, and the current read repair code only repairs the last bad one. Restructure btrfs_repair_one_sector so that it records the originally failed mirror and the number of copies, and then repair all known bad copies until we reach the originally failed copy in clean_io_failure. Note that this also means the read repair reads will always start from the next bad mirror and not mirror 0. This fixes btrfs/265 in xfstests. Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
d28beb3e81
commit
c144c63fd3
2 changed files with 61 additions and 66 deletions
|
@ -2434,6 +2434,20 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int next_mirror(const struct io_failure_record *failrec, int cur_mirror)
|
||||||
|
{
|
||||||
|
if (cur_mirror == failrec->num_copies)
|
||||||
|
return cur_mirror + 1 - failrec->num_copies;
|
||||||
|
return cur_mirror + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int prev_mirror(const struct io_failure_record *failrec, int cur_mirror)
|
||||||
|
{
|
||||||
|
if (cur_mirror == 1)
|
||||||
|
return failrec->num_copies;
|
||||||
|
return cur_mirror - 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* each time an IO finishes, we do a fast check in the IO failure tree
|
* each time an IO finishes, we do a fast check in the IO failure tree
|
||||||
* to see if we need to process or clean up an io_failure_record
|
* to see if we need to process or clean up an io_failure_record
|
||||||
|
@ -2446,7 +2460,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
|
||||||
u64 private;
|
u64 private;
|
||||||
struct io_failure_record *failrec;
|
struct io_failure_record *failrec;
|
||||||
struct extent_state *state;
|
struct extent_state *state;
|
||||||
int num_copies;
|
int mirror;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
private = 0;
|
private = 0;
|
||||||
|
@ -2470,20 +2484,19 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
|
||||||
EXTENT_LOCKED);
|
EXTENT_LOCKED);
|
||||||
spin_unlock(&io_tree->lock);
|
spin_unlock(&io_tree->lock);
|
||||||
|
|
||||||
if (state && state->start <= failrec->start &&
|
if (!state || state->start > failrec->start ||
|
||||||
state->end >= failrec->start + failrec->len - 1) {
|
state->end < failrec->start + failrec->len - 1)
|
||||||
num_copies = btrfs_num_copies(fs_info, failrec->logical,
|
goto out;
|
||||||
failrec->len);
|
|
||||||
if (num_copies > 1) {
|
mirror = failrec->this_mirror;
|
||||||
|
do {
|
||||||
|
mirror = prev_mirror(failrec, mirror);
|
||||||
repair_io_failure(fs_info, ino, start, failrec->len,
|
repair_io_failure(fs_info, ino, start, failrec->len,
|
||||||
failrec->logical, page, pg_offset,
|
failrec->logical, page, pg_offset, mirror);
|
||||||
failrec->failed_mirror);
|
} while (mirror != failrec->failed_mirror);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
free_io_failure(failure_tree, io_tree, failrec);
|
free_io_failure(failure_tree, io_tree, failrec);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2522,7 +2535,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
|
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
|
||||||
u64 start)
|
u64 start,
|
||||||
|
int failed_mirror)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
struct io_failure_record *failrec;
|
struct io_failure_record *failrec;
|
||||||
|
@ -2544,7 +2558,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
|
||||||
* (e.g. with a list for failed_mirror) to make
|
* (e.g. with a list for failed_mirror) to make
|
||||||
* clean_io_failure() clean all those errors at once.
|
* clean_io_failure() clean all those errors at once.
|
||||||
*/
|
*/
|
||||||
|
ASSERT(failrec->this_mirror == failed_mirror);
|
||||||
|
ASSERT(failrec->len == fs_info->sectorsize);
|
||||||
return failrec;
|
return failrec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2554,7 +2569,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
|
||||||
|
|
||||||
failrec->start = start;
|
failrec->start = start;
|
||||||
failrec->len = sectorsize;
|
failrec->len = sectorsize;
|
||||||
failrec->this_mirror = 0;
|
failrec->failed_mirror = failed_mirror;
|
||||||
|
failrec->this_mirror = failed_mirror;
|
||||||
failrec->compress_type = BTRFS_COMPRESS_NONE;
|
failrec->compress_type = BTRFS_COMPRESS_NONE;
|
||||||
|
|
||||||
read_lock(&em_tree->lock);
|
read_lock(&em_tree->lock);
|
||||||
|
@ -2589,6 +2605,20 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
|
||||||
failrec->logical = logical;
|
failrec->logical = logical;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
|
failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
|
||||||
|
if (failrec->num_copies == 1) {
|
||||||
|
/*
|
||||||
|
* We only have a single copy of the data, so don't bother with
|
||||||
|
* all the retry and error correction code that follows. No
|
||||||
|
* matter what the error is, it is very likely to persist.
|
||||||
|
*/
|
||||||
|
btrfs_debug(fs_info,
|
||||||
|
"cannot repair logical %llu num_copies %d",
|
||||||
|
failrec->logical, failrec->num_copies);
|
||||||
|
kfree(failrec);
|
||||||
|
return ERR_PTR(-EIO);
|
||||||
|
}
|
||||||
|
|
||||||
/* Set the bits in the private failure tree */
|
/* Set the bits in the private failure tree */
|
||||||
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
|
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
|
||||||
EXTENT_LOCKED | EXTENT_DIRTY);
|
EXTENT_LOCKED | EXTENT_DIRTY);
|
||||||
|
@ -2605,54 +2635,6 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
|
||||||
return failrec;
|
return failrec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool btrfs_check_repairable(struct inode *inode,
|
|
||||||
struct io_failure_record *failrec,
|
|
||||||
int failed_mirror)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
|
||||||
int num_copies;
|
|
||||||
|
|
||||||
num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
|
|
||||||
if (num_copies == 1) {
|
|
||||||
/*
|
|
||||||
* we only have a single copy of the data, so don't bother with
|
|
||||||
* all the retry and error correction code that follows. no
|
|
||||||
* matter what the error is, it is very likely to persist.
|
|
||||||
*/
|
|
||||||
btrfs_debug(fs_info,
|
|
||||||
"Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
|
|
||||||
num_copies, failrec->this_mirror, failed_mirror);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The failure record should only contain one sector */
|
|
||||||
ASSERT(failrec->len == fs_info->sectorsize);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* There are two premises:
|
|
||||||
* a) deliver good data to the caller
|
|
||||||
* b) correct the bad sectors on disk
|
|
||||||
*
|
|
||||||
* Since we're only doing repair for one sector, we only need to get
|
|
||||||
* a good copy of the failed sector and if we succeed, we have setup
|
|
||||||
* everything for repair_io_failure to do the rest for us.
|
|
||||||
*/
|
|
||||||
ASSERT(failed_mirror);
|
|
||||||
failrec->failed_mirror = failed_mirror;
|
|
||||||
failrec->this_mirror++;
|
|
||||||
if (failrec->this_mirror == failed_mirror)
|
|
||||||
failrec->this_mirror++;
|
|
||||||
|
|
||||||
if (failrec->this_mirror > num_copies) {
|
|
||||||
btrfs_debug(fs_info,
|
|
||||||
"Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
|
|
||||||
num_copies, failrec->this_mirror, failed_mirror);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_repair_one_sector(struct inode *inode,
|
int btrfs_repair_one_sector(struct inode *inode,
|
||||||
struct bio *failed_bio, u32 bio_offset,
|
struct bio *failed_bio, u32 bio_offset,
|
||||||
struct page *page, unsigned int pgoff,
|
struct page *page, unsigned int pgoff,
|
||||||
|
@ -2673,12 +2655,24 @@ int btrfs_repair_one_sector(struct inode *inode,
|
||||||
|
|
||||||
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
|
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
|
||||||
|
|
||||||
failrec = btrfs_get_io_failure_record(inode, start);
|
failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
|
||||||
if (IS_ERR(failrec))
|
if (IS_ERR(failrec))
|
||||||
return PTR_ERR(failrec);
|
return PTR_ERR(failrec);
|
||||||
|
|
||||||
|
/*
|
||||||
if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
|
* There are two premises:
|
||||||
|
* a) deliver good data to the caller
|
||||||
|
* b) correct the bad sectors on disk
|
||||||
|
*
|
||||||
|
* Since we're only doing repair for one sector, we only need to get
|
||||||
|
* a good copy of the failed sector and if we succeed, we have setup
|
||||||
|
* everything for repair_io_failure to do the rest for us.
|
||||||
|
*/
|
||||||
|
failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
|
||||||
|
if (failrec->this_mirror == failrec->failed_mirror) {
|
||||||
|
btrfs_debug(fs_info,
|
||||||
|
"failed to repair num_copies %d this_mirror %d failed_mirror %d",
|
||||||
|
failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
|
||||||
free_io_failure(failure_tree, tree, failrec);
|
free_io_failure(failure_tree, tree, failrec);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
|
@ -263,6 +263,7 @@ struct io_failure_record {
|
||||||
enum btrfs_compression_type compress_type;
|
enum btrfs_compression_type compress_type;
|
||||||
int this_mirror;
|
int this_mirror;
|
||||||
int failed_mirror;
|
int failed_mirror;
|
||||||
|
int num_copies;
|
||||||
};
|
};
|
||||||
|
|
||||||
int btrfs_repair_one_sector(struct inode *inode,
|
int btrfs_repair_one_sector(struct inode *inode,
|
||||||
|
|
Loading…
Add table
Reference in a new issue