mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
bcachefs: Fixes for going RO
Now that interior btree updates are fully transactional, we don't need to write out alloc info in a loop. However, interior btree updates do put more things in the journal, so we still need a loop in the RO sequence. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
baeed3c3c0
commit
039fc4c522
5 changed files with 64 additions and 37 deletions
|
@ -869,6 +869,15 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
|
|||
if (!invalidating_cached_data)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If the read-only path is trying to shut down, we can't be generating
|
||||
* new btree updates:
|
||||
*/
|
||||
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
|
||||
|
@ -956,7 +965,7 @@ out:
|
|||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
|
|
@ -482,6 +482,7 @@ enum {
|
|||
BCH_FS_ALLOC_CLEAN,
|
||||
BCH_FS_ALLOCATOR_STARTED,
|
||||
BCH_FS_ALLOCATOR_RUNNING,
|
||||
BCH_FS_ALLOCATOR_STOPPING,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
|
|
|
@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
/* returns true if we did work */
|
||||
static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
bool ret = false;
|
||||
u64 seq;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
|||
BUG_ON(j->flush_in_progress != pin);
|
||||
j->flush_in_progress = NULL;
|
||||
wake_up(&j->pin_flush_wait);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
|||
mutex_unlock(&j->reclaim_lock);
|
||||
}
|
||||
|
||||
static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
bool *did_work)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
|||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
journal_flush_pins(j, seq_to_flush, 0);
|
||||
*did_work = journal_flush_pins(j, seq_to_flush, 0);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
|
@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return;
|
||||
bool did_work = false;
|
||||
|
||||
closure_wait_event(&j->async_wait, journal_flush_done(j, seq_to_flush));
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return false;
|
||||
|
||||
closure_wait_event(&j->async_wait,
|
||||
journal_flush_done(j, seq_to_flush, &did_work));
|
||||
|
||||
return did_work;
|
||||
}
|
||||
|
||||
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
|
|
|
@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *);
|
|||
void bch2_journal_reclaim(struct journal *);
|
||||
void bch2_journal_reclaim_work(struct work_struct *);
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *, u64);
|
||||
bool bch2_journal_flush_pins(struct journal *, u64);
|
||||
|
||||
static inline void bch2_journal_flush_all_pins(struct journal *j)
|
||||
static inline bool bch2_journal_flush_all_pins(struct journal *j)
|
||||
{
|
||||
bch2_journal_flush_pins(j, U64_MAX);
|
||||
return bch2_journal_flush_pins(j, U64_MAX);
|
||||
}
|
||||
|
||||
int bch2_journal_flush_device_pins(struct journal *, int);
|
||||
|
|
|
@ -175,7 +175,7 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
|
|||
static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
bool wrote;
|
||||
bool wrote = false;
|
||||
unsigned i, clean_passes = 0;
|
||||
int ret;
|
||||
|
||||
|
@ -200,39 +200,46 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
|||
goto nowrote_alloc;
|
||||
|
||||
bch_verbose(c, "writing alloc info");
|
||||
/*
|
||||
* This should normally just be writing the bucket read/write clocks:
|
||||
*/
|
||||
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
|
||||
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
|
||||
bch_verbose(c, "writing alloc info complete");
|
||||
|
||||
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
|
||||
|
||||
if (ret)
|
||||
goto nowrote_alloc;
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators");
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
|
||||
|
||||
do {
|
||||
wrote = false;
|
||||
clean_passes++;
|
||||
|
||||
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
|
||||
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
|
||||
|
||||
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
|
||||
|
||||
if (ret)
|
||||
goto nowrote_alloc;
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_quiesce(c, ca);
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
if (bch2_journal_flush_all_pins(&c->journal))
|
||||
clean_passes = 0;
|
||||
|
||||
/*
|
||||
* We need to explicitly wait on btree interior updates to complete
|
||||
* before stopping the journal, flushing all journal pins isn't
|
||||
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
|
||||
* interior updates have to drop their journal pin before they're
|
||||
* fully complete:
|
||||
* In flight interior btree updates will generate more journal
|
||||
* updates and btree updates (alloc btree):
|
||||
*/
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
if (bch2_btree_interior_updates_nr_pending(c)) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
clean_passes = 0;
|
||||
}
|
||||
flush_work(&c->btree_interior_update_work);
|
||||
|
||||
clean_passes = wrote ? 0 : clean_passes + 1;
|
||||
if (bch2_journal_flush_all_pins(&c->journal))
|
||||
clean_passes = 0;
|
||||
} while (clean_passes < 2);
|
||||
bch_verbose(c, "flushing journal and stopping allocators complete");
|
||||
|
||||
bch_verbose(c, "writing alloc info complete");
|
||||
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
|
||||
nowrote_alloc:
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
|
@ -243,11 +250,10 @@ nowrote_alloc:
|
|||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/* XXX: mark super that alloc info is persistent */
|
||||
|
||||
/*
|
||||
* the journal kicks off btree writes via reclaim - wait for in flight
|
||||
* writes after stopping journal:
|
||||
|
|
Loading…
Add table
Reference in a new issue