mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
md/raid5: remove rcu protection to access rdev from conf
Because it's safe to accees rdev from conf: - If any spinlock is held, because synchronize_rcu() from md_kick_rdev_from_array() will prevent 'rdev' to be freed until spinlock is released; - If 'reconfig_lock' is held, because rdev can't be added or removed from array; - If there is normal IO inflight, because mddev_suspend() will prevent rdev to be added or removed from array; - If there is sync IO inflight, because 'MD_RECOVERY_RUNNING' is checked in remove_and_add_spares(). And these will cover all the scenarios in raid456. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20231125081604.3939938-5-yukuai1@huaweicloud.com
This commit is contained in:
parent
2d32777d60
commit
ad8606702f
4 changed files with 69 additions and 144 deletions
|
@ -1890,28 +1890,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* in case device is broken */
|
/* in case device is broken */
|
||||||
rcu_read_lock();
|
rdev = conf->disks[disk_index].rdev;
|
||||||
rdev = rcu_dereference(conf->disks[disk_index].rdev);
|
|
||||||
if (rdev) {
|
if (rdev) {
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
sync_page_io(rdev, sh->sector, PAGE_SIZE,
|
sync_page_io(rdev, sh->sector, PAGE_SIZE,
|
||||||
sh->dev[disk_index].page, REQ_OP_WRITE,
|
sh->dev[disk_index].page, REQ_OP_WRITE,
|
||||||
false);
|
false);
|
||||||
rdev_dec_pending(rdev, rdev->mddev);
|
rdev_dec_pending(rdev, rdev->mddev);
|
||||||
rcu_read_lock();
|
|
||||||
}
|
}
|
||||||
rrdev = rcu_dereference(conf->disks[disk_index].replacement);
|
rrdev = conf->disks[disk_index].replacement;
|
||||||
if (rrdev) {
|
if (rrdev) {
|
||||||
atomic_inc(&rrdev->nr_pending);
|
atomic_inc(&rrdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
sync_page_io(rrdev, sh->sector, PAGE_SIZE,
|
sync_page_io(rrdev, sh->sector, PAGE_SIZE,
|
||||||
sh->dev[disk_index].page, REQ_OP_WRITE,
|
sh->dev[disk_index].page, REQ_OP_WRITE,
|
||||||
false);
|
false);
|
||||||
rdev_dec_pending(rrdev, rrdev->mddev);
|
rdev_dec_pending(rrdev, rrdev->mddev);
|
||||||
rcu_read_lock();
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
ctx->data_parity_stripes++;
|
ctx->data_parity_stripes++;
|
||||||
out:
|
out:
|
||||||
|
@ -2948,7 +2942,6 @@ bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
|
||||||
if (!log)
|
if (!log)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
||||||
tree_index = r5c_tree_index(conf, sect);
|
tree_index = r5c_tree_index(conf, sect);
|
||||||
slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
|
slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
|
||||||
return slot != NULL;
|
return slot != NULL;
|
||||||
|
|
|
@ -620,11 +620,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
struct block_device *bdev = NULL;
|
struct block_device *bdev = NULL;
|
||||||
|
|
||||||
rcu_read_lock();
|
rdev = conf->disks[i].rdev;
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
|
||||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||||
bdev = rdev->bdev;
|
bdev = rdev->bdev;
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
if (bdev) {
|
if (bdev) {
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
@ -882,9 +880,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||||
(unsigned long long)r_sector, dd_idx,
|
(unsigned long long)r_sector, dd_idx,
|
||||||
(unsigned long long)sector);
|
(unsigned long long)sector);
|
||||||
|
|
||||||
/* Array has not started so rcu dereference is safe */
|
rdev = conf->disks[dd_idx].rdev;
|
||||||
rdev = rcu_dereference_protected(
|
|
||||||
conf->disks[dd_idx].rdev, 1);
|
|
||||||
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
|
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
|
||||||
sector >= rdev->recovery_offset)) {
|
sector >= rdev->recovery_offset)) {
|
||||||
pr_debug("%s:%*s data member disk %d missing\n",
|
pr_debug("%s:%*s data member disk %d missing\n",
|
||||||
|
@ -936,9 +932,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||||
0, &disk, &sh);
|
0, &disk, &sh);
|
||||||
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
|
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
|
||||||
|
|
||||||
/* Array has not started so rcu dereference is safe */
|
parity_rdev = conf->disks[sh.pd_idx].rdev;
|
||||||
parity_rdev = rcu_dereference_protected(
|
|
||||||
conf->disks[sh.pd_idx].rdev, 1);
|
|
||||||
|
|
||||||
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
|
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
|
||||||
pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
|
pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
|
||||||
|
@ -1404,9 +1398,7 @@ int ppl_init_log(struct r5conf *conf)
|
||||||
|
|
||||||
for (i = 0; i < ppl_conf->count; i++) {
|
for (i = 0; i < ppl_conf->count; i++) {
|
||||||
struct ppl_log *log = &ppl_conf->child_logs[i];
|
struct ppl_log *log = &ppl_conf->child_logs[i];
|
||||||
/* Array has not started so rcu dereference is safe */
|
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||||
struct md_rdev *rdev =
|
|
||||||
rcu_dereference_protected(conf->disks[i].rdev, 1);
|
|
||||||
|
|
||||||
mutex_init(&log->io_mutex);
|
mutex_init(&log->io_mutex);
|
||||||
spin_lock_init(&log->io_list_lock);
|
spin_lock_init(&log->io_list_lock);
|
||||||
|
|
|
@ -693,12 +693,12 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||||
int degraded, degraded2;
|
int degraded, degraded2;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
degraded = 0;
|
degraded = 0;
|
||||||
for (i = 0; i < conf->previous_raid_disks; i++) {
|
for (i = 0; i < conf->previous_raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||||
|
|
||||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
rdev = READ_ONCE(conf->disks[i].replacement);
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||||
degraded++;
|
degraded++;
|
||||||
else if (test_bit(In_sync, &rdev->flags))
|
else if (test_bit(In_sync, &rdev->flags))
|
||||||
|
@ -716,15 +716,14 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||||
if (conf->raid_disks >= conf->previous_raid_disks)
|
if (conf->raid_disks >= conf->previous_raid_disks)
|
||||||
degraded++;
|
degraded++;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
if (conf->raid_disks == conf->previous_raid_disks)
|
if (conf->raid_disks == conf->previous_raid_disks)
|
||||||
return degraded;
|
return degraded;
|
||||||
rcu_read_lock();
|
|
||||||
degraded2 = 0;
|
degraded2 = 0;
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||||
|
|
||||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
rdev = READ_ONCE(conf->disks[i].replacement);
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||||
degraded2++;
|
degraded2++;
|
||||||
else if (test_bit(In_sync, &rdev->flags))
|
else if (test_bit(In_sync, &rdev->flags))
|
||||||
|
@ -738,7 +737,6 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||||
if (conf->raid_disks <= conf->previous_raid_disks)
|
if (conf->raid_disks <= conf->previous_raid_disks)
|
||||||
degraded2++;
|
degraded2++;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
if (degraded2 > degraded)
|
if (degraded2 > degraded)
|
||||||
return degraded2;
|
return degraded2;
|
||||||
return degraded;
|
return degraded;
|
||||||
|
@ -1183,14 +1181,8 @@ again:
|
||||||
bi = &dev->req;
|
bi = &dev->req;
|
||||||
rbi = &dev->rreq; /* For writing to replacement */
|
rbi = &dev->rreq; /* For writing to replacement */
|
||||||
|
|
||||||
rcu_read_lock();
|
rdev = conf->disks[i].rdev;
|
||||||
rrdev = rcu_dereference(conf->disks[i].replacement);
|
rrdev = conf->disks[i].replacement;
|
||||||
smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
|
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
|
||||||
if (!rdev) {
|
|
||||||
rdev = rrdev;
|
|
||||||
rrdev = NULL;
|
|
||||||
}
|
|
||||||
if (op_is_write(op)) {
|
if (op_is_write(op)) {
|
||||||
if (replace_only)
|
if (replace_only)
|
||||||
rdev = NULL;
|
rdev = NULL;
|
||||||
|
@ -1211,7 +1203,6 @@ again:
|
||||||
rrdev = NULL;
|
rrdev = NULL;
|
||||||
if (rrdev)
|
if (rrdev)
|
||||||
atomic_inc(&rrdev->nr_pending);
|
atomic_inc(&rrdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
/* We have already checked bad blocks for reads. Now
|
/* We have already checked bad blocks for reads. Now
|
||||||
* need to check for writes. We never accept write errors
|
* need to check for writes. We never accept write errors
|
||||||
|
@ -2730,28 +2721,6 @@ static void shrink_stripes(struct r5conf *conf)
|
||||||
conf->slab_cache = NULL;
|
conf->slab_cache = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This helper wraps rcu_dereference_protected() and can be used when
|
|
||||||
* it is known that the nr_pending of the rdev is elevated.
|
|
||||||
*/
|
|
||||||
static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
|
|
||||||
{
|
|
||||||
return rcu_dereference_protected(rdev,
|
|
||||||
atomic_read(&rcu_access_pointer(rdev)->nr_pending));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This helper wraps rcu_dereference_protected() and should be used
|
|
||||||
* when it is known that the mddev_lock() is held. This is safe
|
|
||||||
* seeing raid5_remove_disk() has the same lock held.
|
|
||||||
*/
|
|
||||||
static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
|
|
||||||
struct md_rdev __rcu *rdev)
|
|
||||||
{
|
|
||||||
return rcu_dereference_protected(rdev,
|
|
||||||
lockdep_is_held(&mddev->reconfig_mutex));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void raid5_end_read_request(struct bio * bi)
|
static void raid5_end_read_request(struct bio * bi)
|
||||||
{
|
{
|
||||||
struct stripe_head *sh = bi->bi_private;
|
struct stripe_head *sh = bi->bi_private;
|
||||||
|
@ -2777,9 +2746,9 @@ static void raid5_end_read_request(struct bio * bi)
|
||||||
* In that case it moved down to 'rdev'.
|
* In that case it moved down to 'rdev'.
|
||||||
* rdev is not removed until all requests are finished.
|
* rdev is not removed until all requests are finished.
|
||||||
*/
|
*/
|
||||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
rdev = conf->disks[i].replacement;
|
||||||
if (!rdev)
|
if (!rdev)
|
||||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
|
|
||||||
if (use_new_offset(conf, sh))
|
if (use_new_offset(conf, sh))
|
||||||
s = sh->sector + rdev->new_data_offset;
|
s = sh->sector + rdev->new_data_offset;
|
||||||
|
@ -2892,11 +2861,11 @@ static void raid5_end_write_request(struct bio *bi)
|
||||||
|
|
||||||
for (i = 0 ; i < disks; i++) {
|
for (i = 0 ; i < disks; i++) {
|
||||||
if (bi == &sh->dev[i].req) {
|
if (bi == &sh->dev[i].req) {
|
||||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (bi == &sh->dev[i].rreq) {
|
if (bi == &sh->dev[i].rreq) {
|
||||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
rdev = conf->disks[i].replacement;
|
||||||
if (rdev)
|
if (rdev)
|
||||||
replacement = 1;
|
replacement = 1;
|
||||||
else
|
else
|
||||||
|
@ -2904,7 +2873,7 @@ static void raid5_end_write_request(struct bio *bi)
|
||||||
* replaced it. rdev is not removed
|
* replaced it. rdev is not removed
|
||||||
* until all requests are finished.
|
* until all requests are finished.
|
||||||
*/
|
*/
|
||||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3666,15 +3635,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||||
int bitmap_end = 0;
|
int bitmap_end = 0;
|
||||||
|
|
||||||
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||||
rcu_read_lock();
|
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
|
||||||
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
||||||
!test_bit(Faulty, &rdev->flags))
|
!test_bit(Faulty, &rdev->flags))
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
else
|
else
|
||||||
rdev = NULL;
|
rdev = NULL;
|
||||||
rcu_read_unlock();
|
|
||||||
if (rdev) {
|
if (rdev) {
|
||||||
if (!rdev_set_badblocks(
|
if (!rdev_set_badblocks(
|
||||||
rdev,
|
rdev,
|
||||||
|
@ -3792,16 +3759,17 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||||
/* During recovery devices cannot be removed, so
|
/* During recovery devices cannot be removed, so
|
||||||
* locking and refcounting of rdevs is not needed
|
* locking and refcounting of rdevs is not needed
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||||
|
|
||||||
if (rdev
|
if (rdev
|
||||||
&& !test_bit(Faulty, &rdev->flags)
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
&& !test_bit(In_sync, &rdev->flags)
|
&& !test_bit(In_sync, &rdev->flags)
|
||||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||||
RAID5_STRIPE_SECTORS(conf), 0))
|
RAID5_STRIPE_SECTORS(conf), 0))
|
||||||
abort = 1;
|
abort = 1;
|
||||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
rdev = conf->disks[i].replacement;
|
||||||
|
|
||||||
if (rdev
|
if (rdev
|
||||||
&& !test_bit(Faulty, &rdev->flags)
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
&& !test_bit(In_sync, &rdev->flags)
|
&& !test_bit(In_sync, &rdev->flags)
|
||||||
|
@ -3809,7 +3777,6 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||||
RAID5_STRIPE_SECTORS(conf), 0))
|
RAID5_STRIPE_SECTORS(conf), 0))
|
||||||
abort = 1;
|
abort = 1;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
if (abort)
|
if (abort)
|
||||||
conf->recovery_disabled =
|
conf->recovery_disabled =
|
||||||
conf->mddev->recovery_disabled;
|
conf->mddev->recovery_disabled;
|
||||||
|
@ -3822,15 +3789,13 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
int rv = 0;
|
int rv = 0;
|
||||||
|
|
||||||
rcu_read_lock();
|
rdev = sh->raid_conf->disks[disk_idx].replacement;
|
||||||
rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement);
|
|
||||||
if (rdev
|
if (rdev
|
||||||
&& !test_bit(Faulty, &rdev->flags)
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
&& !test_bit(In_sync, &rdev->flags)
|
&& !test_bit(In_sync, &rdev->flags)
|
||||||
&& (rdev->recovery_offset <= sh->sector
|
&& (rdev->recovery_offset <= sh->sector
|
||||||
|| rdev->mddev->recovery_cp <= sh->sector))
|
|| rdev->mddev->recovery_cp <= sh->sector))
|
||||||
rv = 1;
|
rv = 1;
|
||||||
rcu_read_unlock();
|
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4707,7 +4672,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
s->log_failed = r5l_log_disk_error(conf);
|
s->log_failed = r5l_log_disk_error(conf);
|
||||||
|
|
||||||
/* Now to look around and see what can be done */
|
/* Now to look around and see what can be done */
|
||||||
rcu_read_lock();
|
|
||||||
for (i=disks; i--; ) {
|
for (i=disks; i--; ) {
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
sector_t first_bad;
|
sector_t first_bad;
|
||||||
|
@ -4752,7 +4716,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
/* Prefer to use the replacement for reads, but only
|
/* Prefer to use the replacement for reads, but only
|
||||||
* if it is recovered enough and has no bad blocks.
|
* if it is recovered enough and has no bad blocks.
|
||||||
*/
|
*/
|
||||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
rdev = conf->disks[i].replacement;
|
||||||
if (rdev && !test_bit(Faulty, &rdev->flags) &&
|
if (rdev && !test_bit(Faulty, &rdev->flags) &&
|
||||||
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
|
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
|
||||||
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
|
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||||
|
@ -4763,7 +4727,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
set_bit(R5_NeedReplace, &dev->flags);
|
set_bit(R5_NeedReplace, &dev->flags);
|
||||||
else
|
else
|
||||||
clear_bit(R5_NeedReplace, &dev->flags);
|
clear_bit(R5_NeedReplace, &dev->flags);
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
clear_bit(R5_ReadRepl, &dev->flags);
|
clear_bit(R5_ReadRepl, &dev->flags);
|
||||||
}
|
}
|
||||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||||
|
@ -4810,8 +4774,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
if (test_bit(R5_WriteError, &dev->flags)) {
|
if (test_bit(R5_WriteError, &dev->flags)) {
|
||||||
/* This flag does not apply to '.replacement'
|
/* This flag does not apply to '.replacement'
|
||||||
* only to .rdev, so make sure to check that*/
|
* only to .rdev, so make sure to check that*/
|
||||||
struct md_rdev *rdev2 = rcu_dereference(
|
struct md_rdev *rdev2 = conf->disks[i].rdev;
|
||||||
conf->disks[i].rdev);
|
|
||||||
if (rdev2 == rdev)
|
if (rdev2 == rdev)
|
||||||
clear_bit(R5_Insync, &dev->flags);
|
clear_bit(R5_Insync, &dev->flags);
|
||||||
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||||
|
@ -4823,8 +4787,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
if (test_bit(R5_MadeGood, &dev->flags)) {
|
if (test_bit(R5_MadeGood, &dev->flags)) {
|
||||||
/* This flag does not apply to '.replacement'
|
/* This flag does not apply to '.replacement'
|
||||||
* only to .rdev, so make sure to check that*/
|
* only to .rdev, so make sure to check that*/
|
||||||
struct md_rdev *rdev2 = rcu_dereference(
|
struct md_rdev *rdev2 = conf->disks[i].rdev;
|
||||||
conf->disks[i].rdev);
|
|
||||||
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||||
s->handle_bad_blocks = 1;
|
s->handle_bad_blocks = 1;
|
||||||
atomic_inc(&rdev2->nr_pending);
|
atomic_inc(&rdev2->nr_pending);
|
||||||
|
@ -4832,8 +4796,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
clear_bit(R5_MadeGood, &dev->flags);
|
clear_bit(R5_MadeGood, &dev->flags);
|
||||||
}
|
}
|
||||||
if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
|
if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||||
struct md_rdev *rdev2 = rcu_dereference(
|
struct md_rdev *rdev2 = conf->disks[i].replacement;
|
||||||
conf->disks[i].replacement);
|
|
||||||
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
|
||||||
s->handle_bad_blocks = 1;
|
s->handle_bad_blocks = 1;
|
||||||
atomic_inc(&rdev2->nr_pending);
|
atomic_inc(&rdev2->nr_pending);
|
||||||
|
@ -4854,8 +4818,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||||
do_recovery = 1;
|
do_recovery = 1;
|
||||||
else if (!rdev) {
|
else if (!rdev) {
|
||||||
rdev = rcu_dereference(
|
rdev = conf->disks[i].replacement;
|
||||||
conf->disks[i].replacement);
|
|
||||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||||
do_recovery = 1;
|
do_recovery = 1;
|
||||||
}
|
}
|
||||||
|
@ -4882,7 +4845,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
else
|
else
|
||||||
s->replacing = 1;
|
s->replacing = 1;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5339,23 +5301,23 @@ finish:
|
||||||
struct r5dev *dev = &sh->dev[i];
|
struct r5dev *dev = &sh->dev[i];
|
||||||
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
|
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
|
||||||
/* We own a safe reference to the rdev */
|
/* We own a safe reference to the rdev */
|
||||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
if (!rdev_set_badblocks(rdev, sh->sector,
|
if (!rdev_set_badblocks(rdev, sh->sector,
|
||||||
RAID5_STRIPE_SECTORS(conf), 0))
|
RAID5_STRIPE_SECTORS(conf), 0))
|
||||||
md_error(conf->mddev, rdev);
|
md_error(conf->mddev, rdev);
|
||||||
rdev_dec_pending(rdev, conf->mddev);
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
}
|
}
|
||||||
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
||||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
rdev_clear_badblocks(rdev, sh->sector,
|
rdev_clear_badblocks(rdev, sh->sector,
|
||||||
RAID5_STRIPE_SECTORS(conf), 0);
|
RAID5_STRIPE_SECTORS(conf), 0);
|
||||||
rdev_dec_pending(rdev, conf->mddev);
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
}
|
}
|
||||||
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
rdev = conf->disks[i].replacement;
|
||||||
if (!rdev)
|
if (!rdev)
|
||||||
/* rdev have been moved down */
|
/* rdev have been moved down */
|
||||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
rdev_clear_badblocks(rdev, sh->sector,
|
rdev_clear_badblocks(rdev, sh->sector,
|
||||||
RAID5_STRIPE_SECTORS(conf), 0);
|
RAID5_STRIPE_SECTORS(conf), 0);
|
||||||
rdev_dec_pending(rdev, conf->mddev);
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
|
@ -5514,24 +5476,22 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||||
&dd_idx, NULL);
|
&dd_idx, NULL);
|
||||||
end_sector = sector + bio_sectors(raid_bio);
|
end_sector = sector + bio_sectors(raid_bio);
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
if (r5c_big_stripe_cached(conf, sector))
|
if (r5c_big_stripe_cached(conf, sector))
|
||||||
goto out_rcu_unlock;
|
return 0;
|
||||||
|
|
||||||
rdev = rcu_dereference(conf->disks[dd_idx].replacement);
|
rdev = conf->disks[dd_idx].replacement;
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||||
rdev->recovery_offset < end_sector) {
|
rdev->recovery_offset < end_sector) {
|
||||||
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
|
rdev = conf->disks[dd_idx].rdev;
|
||||||
if (!rdev)
|
if (!rdev)
|
||||||
goto out_rcu_unlock;
|
return 0;
|
||||||
if (test_bit(Faulty, &rdev->flags) ||
|
if (test_bit(Faulty, &rdev->flags) ||
|
||||||
!(test_bit(In_sync, &rdev->flags) ||
|
!(test_bit(In_sync, &rdev->flags) ||
|
||||||
rdev->recovery_offset >= end_sector))
|
rdev->recovery_offset >= end_sector))
|
||||||
goto out_rcu_unlock;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
|
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
|
||||||
&bad_sectors)) {
|
&bad_sectors)) {
|
||||||
|
@ -5575,10 +5535,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||||
raid_bio->bi_iter.bi_sector);
|
raid_bio->bi_iter.bi_sector);
|
||||||
submit_bio_noacct(align_bio);
|
submit_bio_noacct(align_bio);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
out_rcu_unlock:
|
|
||||||
rcu_read_unlock();
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
||||||
|
@ -6581,14 +6537,12 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
|
||||||
* Note in case of > 1 drive failures it's possible we're rebuilding
|
* Note in case of > 1 drive failures it's possible we're rebuilding
|
||||||
* one drive while leaving another faulty drive in array.
|
* one drive while leaving another faulty drive in array.
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||||
|
|
||||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
||||||
still_degraded = 1;
|
still_degraded = 1;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
|
md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
|
||||||
|
|
||||||
|
@ -7899,18 +7853,10 @@ static int raid5_run(struct mddev *mddev)
|
||||||
|
|
||||||
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
||||||
i++) {
|
i++) {
|
||||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
if (!rdev && conf->disks[i].replacement) {
|
|
||||||
/* The replacement is all we have yet */
|
|
||||||
rdev = rdev_mdlock_deref(mddev,
|
|
||||||
conf->disks[i].replacement);
|
|
||||||
conf->disks[i].replacement = NULL;
|
|
||||||
clear_bit(Replacement, &rdev->flags);
|
|
||||||
rcu_assign_pointer(conf->disks[i].rdev, rdev);
|
|
||||||
}
|
|
||||||
if (!rdev)
|
if (!rdev)
|
||||||
continue;
|
continue;
|
||||||
if (rcu_access_pointer(conf->disks[i].replacement) &&
|
if (conf->disks[i].replacement &&
|
||||||
conf->reshape_progress != MaxSector) {
|
conf->reshape_progress != MaxSector) {
|
||||||
/* replacements and reshape simply do not mix. */
|
/* replacements and reshape simply do not mix. */
|
||||||
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
|
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
|
||||||
|
@ -8094,15 +8040,16 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
|
||||||
struct r5conf *conf = mddev->private;
|
struct r5conf *conf = mddev->private;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
lockdep_assert_held(&mddev->lock);
|
||||||
|
|
||||||
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
|
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
|
||||||
conf->chunk_sectors / 2, mddev->layout);
|
conf->chunk_sectors / 2, mddev->layout);
|
||||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
|
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
|
||||||
rcu_read_lock();
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||||
|
|
||||||
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
seq_printf (seq, "]");
|
seq_printf (seq, "]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8140,9 +8087,8 @@ static int raid5_spare_active(struct mddev *mddev)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
rdev = conf->disks[i].rdev;
|
||||||
replacement = rdev_mdlock_deref(mddev,
|
replacement = conf->disks[i].replacement;
|
||||||
conf->disks[i].replacement);
|
|
||||||
if (replacement
|
if (replacement
|
||||||
&& replacement->recovery_offset == MaxSector
|
&& replacement->recovery_offset == MaxSector
|
||||||
&& !test_bit(Faulty, &replacement->flags)
|
&& !test_bit(Faulty, &replacement->flags)
|
||||||
|
@ -8181,7 +8127,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
struct r5conf *conf = mddev->private;
|
struct r5conf *conf = mddev->private;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
int number = rdev->raid_disk;
|
int number = rdev->raid_disk;
|
||||||
struct md_rdev __rcu **rdevp;
|
struct md_rdev **rdevp;
|
||||||
struct disk_info *p;
|
struct disk_info *p;
|
||||||
struct md_rdev *tmp;
|
struct md_rdev *tmp;
|
||||||
|
|
||||||
|
@ -8204,9 +8150,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
if (unlikely(number >= conf->pool_size))
|
if (unlikely(number >= conf->pool_size))
|
||||||
return 0;
|
return 0;
|
||||||
p = conf->disks + number;
|
p = conf->disks + number;
|
||||||
if (rdev == rcu_access_pointer(p->rdev))
|
if (rdev == p->rdev)
|
||||||
rdevp = &p->rdev;
|
rdevp = &p->rdev;
|
||||||
else if (rdev == rcu_access_pointer(p->replacement))
|
else if (rdev == p->replacement)
|
||||||
rdevp = &p->replacement;
|
rdevp = &p->replacement;
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -8226,28 +8172,24 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
if (!test_bit(Faulty, &rdev->flags) &&
|
if (!test_bit(Faulty, &rdev->flags) &&
|
||||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||||
!has_failed(conf) &&
|
!has_failed(conf) &&
|
||||||
(!rcu_access_pointer(p->replacement) ||
|
(!p->replacement || p->replacement == rdev) &&
|
||||||
rcu_access_pointer(p->replacement) == rdev) &&
|
|
||||||
number < conf->raid_disks) {
|
number < conf->raid_disks) {
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
*rdevp = NULL;
|
WRITE_ONCE(*rdevp, NULL);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
err = log_modify(conf, rdev, false);
|
err = log_modify(conf, rdev, false);
|
||||||
if (err)
|
if (err)
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp = rcu_access_pointer(p->replacement);
|
tmp = p->replacement;
|
||||||
if (tmp) {
|
if (tmp) {
|
||||||
/* We must have just cleared 'rdev' */
|
/* We must have just cleared 'rdev' */
|
||||||
rcu_assign_pointer(p->rdev, tmp);
|
WRITE_ONCE(p->rdev, tmp);
|
||||||
clear_bit(Replacement, &tmp->flags);
|
clear_bit(Replacement, &tmp->flags);
|
||||||
smp_mb(); /* Make sure other CPUs may see both as identical
|
WRITE_ONCE(p->replacement, NULL);
|
||||||
* but will never see neither - if they are careful
|
|
||||||
*/
|
|
||||||
rcu_assign_pointer(p->replacement, NULL);
|
|
||||||
|
|
||||||
if (!err)
|
if (!err)
|
||||||
err = log_modify(conf, tmp, true);
|
err = log_modify(conf, tmp, true);
|
||||||
|
@ -8315,7 +8257,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
rdev->raid_disk = disk;
|
rdev->raid_disk = disk;
|
||||||
if (rdev->saved_raid_disk != disk)
|
if (rdev->saved_raid_disk != disk)
|
||||||
conf->fullsync = 1;
|
conf->fullsync = 1;
|
||||||
rcu_assign_pointer(p->rdev, rdev);
|
WRITE_ONCE(p->rdev, rdev);
|
||||||
|
|
||||||
err = log_modify(conf, rdev, true);
|
err = log_modify(conf, rdev, true);
|
||||||
|
|
||||||
|
@ -8324,7 +8266,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
}
|
}
|
||||||
for (disk = first; disk <= last; disk++) {
|
for (disk = first; disk <= last; disk++) {
|
||||||
p = conf->disks + disk;
|
p = conf->disks + disk;
|
||||||
tmp = rdev_mdlock_deref(mddev, p->rdev);
|
tmp = p->rdev;
|
||||||
if (test_bit(WantReplacement, &tmp->flags) &&
|
if (test_bit(WantReplacement, &tmp->flags) &&
|
||||||
mddev->reshape_position == MaxSector &&
|
mddev->reshape_position == MaxSector &&
|
||||||
p->replacement == NULL) {
|
p->replacement == NULL) {
|
||||||
|
@ -8333,7 +8275,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
rdev->raid_disk = disk;
|
rdev->raid_disk = disk;
|
||||||
err = 0;
|
err = 0;
|
||||||
conf->fullsync = 1;
|
conf->fullsync = 1;
|
||||||
rcu_assign_pointer(p->replacement, rdev);
|
WRITE_ONCE(p->replacement, rdev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8466,7 +8408,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||||
if (mddev->recovery_cp < MaxSector)
|
if (mddev->recovery_cp < MaxSector)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
for (i = 0; i < conf->raid_disks; i++)
|
for (i = 0; i < conf->raid_disks; i++)
|
||||||
if (rdev_mdlock_deref(mddev, conf->disks[i].replacement))
|
if (conf->disks[i].replacement)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
rdev_for_each(rdev, mddev) {
|
rdev_for_each(rdev, mddev) {
|
||||||
|
@ -8637,12 +8579,10 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||||
for (d = conf->raid_disks ;
|
for (d = conf->raid_disks ;
|
||||||
d < conf->raid_disks - mddev->delta_disks;
|
d < conf->raid_disks - mddev->delta_disks;
|
||||||
d++) {
|
d++) {
|
||||||
rdev = rdev_mdlock_deref(mddev,
|
rdev = conf->disks[d].rdev;
|
||||||
conf->disks[d].rdev);
|
|
||||||
if (rdev)
|
if (rdev)
|
||||||
clear_bit(In_sync, &rdev->flags);
|
clear_bit(In_sync, &rdev->flags);
|
||||||
rdev = rdev_mdlock_deref(mddev,
|
rdev = conf->disks[d].replacement;
|
||||||
conf->disks[d].replacement);
|
|
||||||
if (rdev)
|
if (rdev)
|
||||||
clear_bit(In_sync, &rdev->flags);
|
clear_bit(In_sync, &rdev->flags);
|
||||||
}
|
}
|
||||||
|
|
|
@ -473,8 +473,8 @@ enum {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct disk_info {
|
struct disk_info {
|
||||||
struct md_rdev __rcu *rdev;
|
struct md_rdev *rdev;
|
||||||
struct md_rdev __rcu *replacement;
|
struct md_rdev *replacement;
|
||||||
struct page *extra_page; /* extra page to use in prexor */
|
struct page *extra_page; /* extra page to use in prexor */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue