2024-03-23 20:07:46 -04:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
|
|
|
#include "bcachefs.h"
|
|
|
|
#include "alloc_background.h"
|
|
|
|
#include "backpointers.h"
|
|
|
|
#include "btree_gc.h"
|
2024-03-11 23:11:46 -04:00
|
|
|
#include "btree_node_scan.h"
|
2023-11-09 14:22:46 -05:00
|
|
|
#include "disk_accounting.h"
|
2024-03-23 20:07:46 -04:00
|
|
|
#include "ec.h"
|
|
|
|
#include "fsck.h"
|
|
|
|
#include "inode.h"
|
|
|
|
#include "journal.h"
|
|
|
|
#include "lru.h"
|
|
|
|
#include "logged_ops.h"
|
2025-04-21 11:52:35 -04:00
|
|
|
#include "movinggc.h"
|
2024-03-23 20:07:46 -04:00
|
|
|
#include "rebalance.h"
|
|
|
|
#include "recovery.h"
|
|
|
|
#include "recovery_passes.h"
|
|
|
|
#include "snapshot.h"
|
|
|
|
#include "subvolume.h"
|
|
|
|
#include "super.h"
|
2024-03-29 20:43:39 -04:00
|
|
|
#include "super-io.h"
|
2024-03-23 20:07:46 -04:00
|
|
|
|
|
|
|
const char * const bch2_recovery_passes[] = {
|
|
|
|
#define x(_fn, ...) #_fn,
|
|
|
|
BCH_RECOVERY_PASSES()
|
|
|
|
#undef x
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2025-05-09 18:24:20 -04:00
|
|
|
static const u8 passes_to_stable_map[] = {
|
|
|
|
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
|
|
|
|
BCH_RECOVERY_PASSES()
|
|
|
|
#undef x
|
|
|
|
};
|
|
|
|
|
|
|
|
static const u8 passes_from_stable_map[] = {
|
|
|
|
#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
|
|
|
|
BCH_RECOVERY_PASSES()
|
|
|
|
#undef x
|
|
|
|
};
|
|
|
|
|
|
|
|
static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
|
|
|
|
{
|
|
|
|
return passes_to_stable_map[pass];
|
|
|
|
}
|
|
|
|
|
|
|
|
u64 bch2_recovery_passes_to_stable(u64 v)
|
|
|
|
{
|
|
|
|
u64 ret = 0;
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
|
|
|
|
if (v & BIT_ULL(i))
|
|
|
|
ret |= BIT_ULL(passes_to_stable_map[i]);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum bch_recovery_pass bch2_recovery_pass_from_stable(enum bch_recovery_pass_stable pass)
|
|
|
|
{
|
|
|
|
return pass < ARRAY_SIZE(passes_from_stable_map)
|
|
|
|
? passes_from_stable_map[pass]
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
u64 bch2_recovery_passes_from_stable(u64 v)
|
|
|
|
{
|
|
|
|
u64 ret = 0;
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(passes_from_stable_map); i++)
|
|
|
|
if (v & BIT_ULL(i))
|
|
|
|
ret |= BIT_ULL(passes_from_stable_map[i]);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bch2_sb_recovery_passes_validate(struct bch_sb *sb, struct bch_sb_field *f,
|
|
|
|
enum bch_validate_flags flags, struct printbuf *err)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bch2_sb_recovery_passes_to_text(struct printbuf *out,
|
|
|
|
struct bch_sb *sb,
|
|
|
|
struct bch_sb_field *f)
|
|
|
|
{
|
|
|
|
struct bch_sb_field_recovery_passes *r =
|
|
|
|
field_to_type(f, recovery_passes);
|
|
|
|
unsigned nr = recovery_passes_nr_entries(r);
|
|
|
|
|
|
|
|
if (out->nr_tabstops < 1)
|
|
|
|
printbuf_tabstop_push(out, 32);
|
|
|
|
if (out->nr_tabstops < 2)
|
|
|
|
printbuf_tabstop_push(out, 16);
|
|
|
|
|
|
|
|
prt_printf(out, "Pass\tLast run\tLast runtime\n");
|
|
|
|
|
|
|
|
for (struct recovery_pass_entry *i = r->start; i < r->start + nr; i++) {
|
|
|
|
if (!i->last_run)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned idx = i - r->start;
|
|
|
|
|
|
|
|
prt_printf(out, "%s\t", bch2_recovery_passes[bch2_recovery_pass_from_stable(idx)]);
|
|
|
|
|
|
|
|
bch2_prt_datetime(out, le64_to_cpu(i->last_run));
|
|
|
|
prt_tab(out);
|
|
|
|
|
|
|
|
bch2_pr_time_units(out, le32_to_cpu(i->last_runtime) * NSEC_PER_SEC);
|
2025-05-31 13:01:44 -04:00
|
|
|
|
|
|
|
if (BCH_RECOVERY_PASS_NO_RATELIMIT(i))
|
|
|
|
prt_str(out, " (no ratelimit)");
|
|
|
|
|
2025-05-09 18:24:20 -04:00
|
|
|
prt_newline(out);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-31 13:01:44 -04:00
|
|
|
static struct recovery_pass_entry *bch2_sb_recovery_pass_entry(struct bch_fs *c,
|
|
|
|
enum bch_recovery_pass pass)
|
2025-05-09 18:24:20 -04:00
|
|
|
{
|
|
|
|
enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
|
|
|
|
|
2025-05-31 13:01:44 -04:00
|
|
|
lockdep_assert_held(&c->sb_lock);
|
2025-05-09 18:24:20 -04:00
|
|
|
|
|
|
|
struct bch_sb_field_recovery_passes *r =
|
|
|
|
bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
|
|
|
|
|
|
|
|
if (stable >= recovery_passes_nr_entries(r)) {
|
|
|
|
unsigned u64s = struct_size(r, start, stable + 1) / sizeof(u64);
|
|
|
|
|
|
|
|
r = bch2_sb_field_resize(&c->disk_sb, recovery_passes, u64s);
|
|
|
|
if (!r) {
|
|
|
|
bch_err(c, "error creating recovery_passes sb section");
|
2025-05-31 13:01:44 -04:00
|
|
|
return NULL;
|
2025-05-09 18:24:20 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-31 13:01:44 -04:00
|
|
|
return r->start + stable;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bch2_sb_recovery_pass_complete(struct bch_fs *c,
|
|
|
|
enum bch_recovery_pass pass,
|
|
|
|
s64 start_time)
|
|
|
|
{
|
|
|
|
guard(mutex)(&c->sb_lock);
|
|
|
|
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
|
|
|
__clear_bit_le64(bch2_recovery_pass_to_stable(pass),
|
|
|
|
ext->recovery_passes_required);
|
|
|
|
|
|
|
|
struct recovery_pass_entry *e = bch2_sb_recovery_pass_entry(c, pass);
|
|
|
|
if (e) {
|
|
|
|
s64 end_time = ktime_get_real_seconds();
|
|
|
|
e->last_run = cpu_to_le64(end_time);
|
|
|
|
e->last_runtime = cpu_to_le32(max(0, end_time - start_time));
|
|
|
|
SET_BCH_RECOVERY_PASS_NO_RATELIMIT(e, false);
|
|
|
|
}
|
|
|
|
|
2025-05-09 18:24:20 -04:00
|
|
|
bch2_write_super(c);
|
2025-05-31 13:01:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void bch2_recovery_pass_set_no_ratelimit(struct bch_fs *c,
|
|
|
|
enum bch_recovery_pass pass)
|
|
|
|
{
|
|
|
|
guard(mutex)(&c->sb_lock);
|
|
|
|
|
|
|
|
struct recovery_pass_entry *e = bch2_sb_recovery_pass_entry(c, pass);
|
|
|
|
if (e && !BCH_RECOVERY_PASS_NO_RATELIMIT(e)) {
|
|
|
|
SET_BCH_RECOVERY_PASS_NO_RATELIMIT(e, false);
|
|
|
|
bch2_write_super(c);
|
|
|
|
}
|
2025-05-09 18:24:20 -04:00
|
|
|
}
|
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass)
|
|
|
|
{
|
|
|
|
enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
|
|
|
|
bool ret = false;
|
|
|
|
|
|
|
|
lockdep_assert_held(&c->sb_lock);
|
|
|
|
|
|
|
|
struct bch_sb_field_recovery_passes *r =
|
|
|
|
bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
|
|
|
|
|
|
|
|
if (stable < recovery_passes_nr_entries(r)) {
|
|
|
|
struct recovery_pass_entry *i = r->start + stable;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ratelimit if the last runtime was more than 1% of the time
|
|
|
|
* since we last ran
|
|
|
|
*/
|
|
|
|
ret = (u64) le32_to_cpu(i->last_runtime) * 100 >
|
|
|
|
ktime_get_real_seconds() - le64_to_cpu(i->last_run);
|
2025-05-31 13:01:44 -04:00
|
|
|
|
|
|
|
if (BCH_RECOVERY_PASS_NO_RATELIMIT(i))
|
|
|
|
ret = false;
|
2025-05-09 23:28:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-05-09 18:24:20 -04:00
|
|
|
const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes = {
|
|
|
|
.validate = bch2_sb_recovery_passes_validate,
|
|
|
|
.to_text = bch2_sb_recovery_passes_to_text
|
|
|
|
};
|
|
|
|
|
2024-11-08 00:25:18 -05:00
|
|
|
/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
|
|
|
|
static int bch2_recovery_pass_empty(struct bch_fs *c)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
static int bch2_set_may_go_rw(struct bch_fs *c)
|
|
|
|
{
|
|
|
|
struct journal_keys *keys = &c->journal_keys;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After we go RW, the journal keys buffer can't be modified (except for
|
|
|
|
* setting journal_key->overwritten: it will be accessed by multiple
|
|
|
|
* threads
|
|
|
|
*/
|
|
|
|
move_gap(keys, keys->nr);
|
|
|
|
|
|
|
|
set_bit(BCH_FS_may_go_rw, &c->flags);
|
|
|
|
|
2025-06-15 16:43:34 -04:00
|
|
|
if (go_rw_in_recovery(c)) {
|
2025-04-03 14:19:23 -04:00
|
|
|
if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
|
|
|
|
bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
|
|
|
|
bch2_reconstruct_alloc(c);
|
|
|
|
}
|
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
return bch2_fs_read_write_early(c);
|
2025-04-03 14:19:23 -04:00
|
|
|
}
|
2024-03-23 20:07:46 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-04-15 19:21:52 -04:00
|
|
|
/*
|
|
|
|
* Make sure root inode is readable while we're still in recovery and can rewind
|
|
|
|
* for repair:
|
|
|
|
*/
|
|
|
|
static int bch2_lookup_root_inode(struct bch_fs *c)
|
|
|
|
{
|
|
|
|
subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM;
|
|
|
|
struct bch_inode_unpacked inode_u;
|
|
|
|
struct bch_subvolume subvol;
|
|
|
|
|
|
|
|
return bch2_trans_do(c,
|
|
|
|
bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
|
|
|
|
bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
|
|
|
|
}
|
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
struct recovery_pass_fn {
|
|
|
|
int (*fn)(struct bch_fs *);
|
|
|
|
unsigned when;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct recovery_pass_fn recovery_pass_fns[] = {
|
|
|
|
#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
|
|
|
|
BCH_RECOVERY_PASSES()
|
|
|
|
#undef x
|
|
|
|
};
|
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
static u64 bch2_recovery_passes_match(unsigned flags)
|
|
|
|
{
|
|
|
|
u64 ret = 0;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
|
|
|
|
if (recovery_pass_fns[i].when & flags)
|
|
|
|
ret |= BIT_ULL(i);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
u64 bch2_fsck_recovery_passes(void)
|
|
|
|
{
|
|
|
|
return bch2_recovery_passes_match(PASS_FSCK);
|
|
|
|
}
|
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
static void bch2_run_async_recovery_passes(struct bch_fs *c)
|
|
|
|
{
|
|
|
|
if (!down_trylock(&c->recovery.run_lock))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_async_recovery_passes))
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
if (queue_work(system_long_wq, &c->recovery.work))
|
|
|
|
return;
|
|
|
|
|
|
|
|
enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes);
|
|
|
|
unlock:
|
|
|
|
up(&c->recovery.run_lock);
|
|
|
|
}
|
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
static bool recovery_pass_needs_set(struct bch_fs *c,
|
|
|
|
enum bch_recovery_pass pass,
|
2025-05-09 23:28:01 -04:00
|
|
|
enum bch_run_recovery_pass_flags *flags)
|
2025-05-14 15:54:20 -04:00
|
|
|
{
|
|
|
|
struct bch_fs_recovery *r = &c->recovery;
|
2025-06-07 14:27:35 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Never run scan_for_btree_nodes persistently: check_topology will run
|
|
|
|
* it if required
|
|
|
|
*/
|
|
|
|
if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
|
|
|
|
*flags |= RUN_RECOVERY_PASS_nopersistent;
|
2025-05-09 23:28:01 -04:00
|
|
|
|
|
|
|
if ((*flags & RUN_RECOVERY_PASS_ratelimit) &&
|
|
|
|
!bch2_recovery_pass_want_ratelimit(c, pass))
|
|
|
|
*flags &= ~RUN_RECOVERY_PASS_ratelimit;
|
2025-05-14 15:54:20 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If RUN_RECOVERY_PASS_nopersistent is set, we don't want to do
|
|
|
|
* anything if the pass has already run: these mean we need a prior pass
|
|
|
|
* to run before we continue to repair, we don't expect that pass to fix
|
|
|
|
* the damage we encountered.
|
|
|
|
*
|
|
|
|
* Otherwise, we run run_explicit_recovery_pass when we find damage, so
|
|
|
|
* it should run again even if it's already run:
|
|
|
|
*/
|
2025-06-07 14:27:35 -04:00
|
|
|
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
|
|
|
|
bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
|
2025-06-25 00:48:14 -04:00
|
|
|
bool rewind = in_recovery &&
|
|
|
|
r->curr_pass > pass &&
|
|
|
|
!(r->passes_complete & BIT_ULL(pass));
|
2025-05-14 15:54:20 -04:00
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
if (persistent
|
|
|
|
? !(c->sb.recovery_passes_required & BIT_ULL(pass))
|
|
|
|
: !((r->passes_to_run|r->passes_complete) & BIT_ULL(pass)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (!(*flags & RUN_RECOVERY_PASS_ratelimit) &&
|
|
|
|
(r->passes_ratelimiting & BIT_ULL(pass)))
|
|
|
|
return true;
|
|
|
|
|
2025-06-25 00:48:14 -04:00
|
|
|
if (rewind)
|
|
|
|
return true;
|
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
return false;
|
2025-05-14 15:54:20 -04:00
|
|
|
}
|
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
/*
|
|
|
|
* For when we need to rewind recovery passes and run a pass we skipped:
|
|
|
|
*/
|
2025-05-14 15:54:20 -04:00
|
|
|
int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
|
|
|
struct printbuf *out,
|
|
|
|
enum bch_recovery_pass pass,
|
|
|
|
enum bch_run_recovery_pass_flags flags)
|
2024-03-23 20:07:46 -04:00
|
|
|
{
|
2025-05-10 18:21:49 -04:00
|
|
|
struct bch_fs_recovery *r = &c->recovery;
|
2025-05-14 15:54:20 -04:00
|
|
|
int ret = 0;
|
2025-05-10 18:21:49 -04:00
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
lockdep_assert_held(&c->sb_lock);
|
2024-09-21 20:21:18 -04:00
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
bch2_printbuf_make_room(out, 1024);
|
|
|
|
out->atomic++;
|
2024-12-04 19:46:35 -05:00
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
unsigned long lockflags;
|
|
|
|
spin_lock_irqsave(&r->lock, lockflags);
|
2024-12-04 19:46:35 -05:00
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
if (!recovery_pass_needs_set(c, pass, &flags))
|
2025-05-14 15:54:20 -04:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
|
2025-05-26 11:12:53 -04:00
|
|
|
bool rewind = in_recovery &&
|
|
|
|
r->curr_pass > pass &&
|
|
|
|
!(r->passes_complete & BIT_ULL(pass));
|
2025-05-09 23:28:01 -04:00
|
|
|
bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit;
|
2025-05-14 15:54:20 -04:00
|
|
|
|
2025-07-06 19:25:27 -04:00
|
|
|
if (!(flags & RUN_RECOVERY_PASS_nopersistent)) {
|
2025-05-14 15:54:20 -04:00
|
|
|
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
|
|
|
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
|
2024-09-21 23:27:59 -04:00
|
|
|
}
|
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
|
|
|
|
(!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) {
|
|
|
|
prt_printf(out, "need recovery pass %s (%u), but already rw\n",
|
|
|
|
bch2_recovery_passes[pass], pass);
|
2025-05-28 11:57:50 -04:00
|
|
|
ret = bch_err_throw(c, cannot_rewind_recovery);
|
2025-05-14 15:54:20 -04:00
|
|
|
goto out;
|
|
|
|
}
|
2024-03-23 20:07:46 -04:00
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
if (ratelimit)
|
|
|
|
r->passes_ratelimiting |= BIT_ULL(pass);
|
|
|
|
else
|
|
|
|
r->passes_ratelimiting &= ~BIT_ULL(pass);
|
|
|
|
|
|
|
|
if (in_recovery && !ratelimit) {
|
|
|
|
prt_printf(out, "running recovery pass %s (%u), currently at %s (%u)%s\n",
|
|
|
|
bch2_recovery_passes[pass], pass,
|
|
|
|
bch2_recovery_passes[r->curr_pass], r->curr_pass,
|
|
|
|
rewind ? " - rewinding" : "");
|
2024-03-23 20:07:46 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
r->passes_to_run |= BIT_ULL(pass);
|
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
if (rewind) {
|
|
|
|
r->next_pass = pass;
|
|
|
|
r->passes_complete &= (1ULL << pass) >> 1;
|
2025-05-28 11:57:50 -04:00
|
|
|
ret = bch_err_throw(c, restart_recovery);
|
2025-05-09 23:28:01 -04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
prt_printf(out, "scheduling recovery pass %s (%u)%s\n",
|
|
|
|
bch2_recovery_passes[pass], pass,
|
|
|
|
ratelimit ? " - ratelimiting" : "");
|
|
|
|
|
|
|
|
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
|
|
|
if (p->when & PASS_ONLINE)
|
|
|
|
bch2_run_async_recovery_passes(c);
|
2024-03-23 20:07:46 -04:00
|
|
|
}
|
2025-05-14 15:54:20 -04:00
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&r->lock, lockflags);
|
2025-04-15 13:45:39 -04:00
|
|
|
--out->atomic;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
2025-05-14 15:54:20 -04:00
|
|
|
struct printbuf *out,
|
|
|
|
enum bch_recovery_pass pass,
|
|
|
|
enum bch_run_recovery_pass_flags flags)
|
2025-04-15 13:45:39 -04:00
|
|
|
{
|
2025-05-09 23:28:01 -04:00
|
|
|
int ret = 0;
|
2025-04-15 13:45:39 -04:00
|
|
|
|
2025-06-25 00:48:14 -04:00
|
|
|
if (recovery_pass_needs_set(c, pass, &flags)) {
|
|
|
|
guard(mutex)(&c->sb_lock);
|
2025-05-09 23:28:01 -04:00
|
|
|
ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
|
|
|
|
bch2_write_super(c);
|
|
|
|
}
|
2025-04-15 13:45:39 -04:00
|
|
|
|
2024-09-21 23:27:59 -04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-05-31 12:48:00 -04:00
|
|
|
/*
|
|
|
|
* Returns 0 if @pass has run recently, otherwise one of
|
|
|
|
* -BCH_ERR_restart_recovery
|
|
|
|
* -BCH_ERR_recovery_pass_will_run
|
|
|
|
*/
|
|
|
|
int bch2_require_recovery_pass(struct bch_fs *c,
|
|
|
|
struct printbuf *out,
|
|
|
|
enum bch_recovery_pass pass)
|
|
|
|
{
|
|
|
|
if (test_bit(BCH_FS_in_recovery, &c->flags) &&
|
|
|
|
c->recovery.passes_complete & BIT_ULL(pass))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
guard(mutex)(&c->sb_lock);
|
|
|
|
|
|
|
|
if (bch2_recovery_pass_want_ratelimit(c, pass))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
enum bch_run_recovery_pass_flags flags = 0;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (recovery_pass_needs_set(c, pass, &flags)) {
|
|
|
|
ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
|
|
|
|
bch2_write_super(c);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret ?: bch_err_throw(c, recovery_pass_will_run);
|
|
|
|
}
|
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
2025-04-26 12:38:53 -04:00
|
|
|
{
|
2025-06-07 14:27:35 -04:00
|
|
|
enum bch_run_recovery_pass_flags flags = 0;
|
2025-05-09 23:28:01 -04:00
|
|
|
|
|
|
|
if (!recovery_pass_needs_set(c, pass, &flags))
|
2025-04-26 12:38:53 -04:00
|
|
|
return 0;
|
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
struct printbuf buf = PRINTBUF;
|
|
|
|
bch2_log_msg_start(c, &buf);
|
|
|
|
|
2025-04-26 12:38:53 -04:00
|
|
|
mutex_lock(&c->sb_lock);
|
2025-05-14 15:54:20 -04:00
|
|
|
int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass,
|
|
|
|
RUN_RECOVERY_PASS_nopersistent);
|
2025-04-26 12:38:53 -04:00
|
|
|
mutex_unlock(&c->sb_lock);
|
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
bch2_print_str(c, KERN_NOTICE, buf.buf);
|
|
|
|
printbuf_exit(&buf);
|
2025-04-26 12:38:53 -04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
|
|
|
{
|
2025-05-10 17:45:45 -04:00
|
|
|
struct bch_fs_recovery *r = &c->recovery;
|
2024-03-23 20:07:46 -04:00
|
|
|
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
|
|
|
|
|
|
|
if (!(p->when & PASS_SILENT))
|
|
|
|
bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
|
|
|
|
bch2_recovery_passes[pass]);
|
2025-05-09 18:24:20 -04:00
|
|
|
|
|
|
|
s64 start_time = ktime_get_real_seconds();
|
|
|
|
int ret = p->fn(c);
|
2025-05-10 17:45:45 -04:00
|
|
|
|
|
|
|
r->passes_to_run &= ~BIT_ULL(pass);
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
r->passes_failing |= BIT_ULL(pass);
|
2024-03-23 20:07:46 -04:00
|
|
|
return ret;
|
2025-05-10 17:45:45 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
r->passes_failing = 0;
|
2025-05-09 18:24:20 -04:00
|
|
|
|
|
|
|
if (!test_bit(BCH_FS_error, &c->flags))
|
|
|
|
bch2_sb_recovery_pass_complete(c, pass, start_time);
|
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
if (!(p->when & PASS_SILENT))
|
|
|
|
bch2_print(c, KERN_CONT " done\n");
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run,
|
|
|
|
bool online)
|
2024-03-23 20:07:46 -04:00
|
|
|
{
|
2025-05-10 17:45:45 -04:00
|
|
|
struct bch_fs_recovery *r = &c->recovery;
|
|
|
|
int ret = 0;
|
2024-03-23 20:07:46 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
spin_lock_irq(&r->lock);
|
2024-03-23 20:07:46 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
if (online)
|
|
|
|
orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE);
|
2024-03-23 20:07:46 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))
|
|
|
|
orig_passes_to_run &= ~bch2_recovery_passes_match(PASS_ALLOC);
|
2024-03-23 20:07:46 -04:00
|
|
|
|
2024-10-31 03:33:36 -04:00
|
|
|
/*
|
2025-05-10 17:45:45 -04:00
|
|
|
* A failed recovery pass will be retried after another pass succeeds -
|
|
|
|
* but not this iteration.
|
|
|
|
*
|
|
|
|
* This is because some passes depend on repair done by other passes: we
|
|
|
|
* may want to retry, but we don't want to loop on failing passes.
|
2024-10-31 03:33:36 -04:00
|
|
|
*/
|
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
orig_passes_to_run &= ~r->passes_failing;
|
2024-12-04 19:46:35 -05:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
r->passes_to_run = orig_passes_to_run;
|
2024-09-21 23:27:59 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
while (r->passes_to_run) {
|
|
|
|
unsigned prev_done = r->pass_done;
|
|
|
|
unsigned pass = __ffs64(r->passes_to_run);
|
|
|
|
r->curr_pass = pass;
|
|
|
|
r->next_pass = r->curr_pass + 1;
|
|
|
|
r->passes_to_run &= ~BIT_ULL(pass);
|
2025-04-21 11:52:35 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
spin_unlock_irq(&r->lock);
|
|
|
|
|
|
|
|
int ret2 = bch2_run_recovery_pass(c, pass) ?:
|
|
|
|
bch2_journal_flush(&c->journal);
|
2024-03-28 21:34:14 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
spin_lock_irq(&r->lock);
|
|
|
|
|
|
|
|
if (r->next_pass < r->curr_pass) {
|
|
|
|
/* Rewind: */
|
|
|
|
r->passes_to_run |= orig_passes_to_run & (~0ULL << r->next_pass);
|
|
|
|
} else if (!ret2) {
|
|
|
|
r->pass_done = max(r->pass_done, pass);
|
|
|
|
r->passes_complete |= BIT_ULL(pass);
|
|
|
|
} else {
|
|
|
|
ret = ret2;
|
2024-03-23 20:07:46 -04:00
|
|
|
}
|
2025-04-21 11:52:35 -04:00
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
if (ret && !online)
|
|
|
|
break;
|
2025-04-13 06:44:23 -04:00
|
|
|
|
|
|
|
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
|
2025-05-10 17:45:45 -04:00
|
|
|
r->pass_done > BCH_RECOVERY_PASS_check_snapshots) {
|
2025-04-13 06:44:23 -04:00
|
|
|
bch2_copygc_wakeup(c);
|
|
|
|
bch2_rebalance_wakeup(c);
|
|
|
|
}
|
2024-03-23 20:07:46 -04:00
|
|
|
}
|
|
|
|
|
2025-05-14 15:54:20 -04:00
|
|
|
clear_bit(BCH_FS_in_recovery, &c->flags);
|
2025-05-10 17:45:45 -04:00
|
|
|
spin_unlock_irq(&r->lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-05-09 23:28:01 -04:00
|
|
|
static void bch2_async_recovery_passes_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct bch_fs *c = container_of(work, struct bch_fs, recovery.work);
|
|
|
|
struct bch_fs_recovery *r = &c->recovery;
|
|
|
|
|
|
|
|
__bch2_run_recovery_passes(c,
|
|
|
|
c->sb.recovery_passes_required & ~r->passes_ratelimiting,
|
|
|
|
true);
|
|
|
|
|
|
|
|
up(&r->run_lock);
|
|
|
|
enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes);
|
|
|
|
}
|
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
int bch2_run_online_recovery_passes(struct bch_fs *c, u64 passes)
|
|
|
|
{
|
|
|
|
return __bch2_run_recovery_passes(c, c->sb.recovery_passes_required|passes, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
int bch2_run_recovery_passes(struct bch_fs *c, enum bch_recovery_pass from)
|
|
|
|
{
|
|
|
|
u64 passes =
|
|
|
|
bch2_recovery_passes_match(PASS_ALWAYS) |
|
|
|
|
(!c->sb.clean ? bch2_recovery_passes_match(PASS_UNCLEAN) : 0) |
|
|
|
|
(c->opts.fsck ? bch2_recovery_passes_match(PASS_FSCK) : 0) |
|
|
|
|
c->opts.recovery_passes |
|
|
|
|
c->sb.recovery_passes_required;
|
|
|
|
|
2025-05-23 18:31:53 -04:00
|
|
|
if (c->opts.recovery_pass_last)
|
|
|
|
passes &= BIT_ULL(c->opts.recovery_pass_last + 1) - 1;
|
|
|
|
|
2025-05-10 17:45:45 -04:00
|
|
|
/*
|
|
|
|
* We can't allow set_may_go_rw to be excluded; that would cause us to
|
|
|
|
* use the journal replay keys for updates where it's not expected.
|
|
|
|
*/
|
|
|
|
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
|
|
|
|
passes &= ~c->opts.recovery_passes_exclude;
|
|
|
|
|
|
|
|
passes &= ~(BIT_ULL(from) - 1);
|
|
|
|
|
|
|
|
down(&c->recovery.run_lock);
|
|
|
|
int ret = __bch2_run_recovery_passes(c, passes, false);
|
2025-05-10 18:21:49 -04:00
|
|
|
up(&c->recovery.run_lock);
|
2025-04-21 11:52:35 -04:00
|
|
|
|
2024-03-23 20:07:46 -04:00
|
|
|
return ret;
|
|
|
|
}
|
2025-05-09 23:22:23 -04:00
|
|
|
|
2025-05-10 18:23:41 -04:00
|
|
|
static void prt_passes(struct printbuf *out, const char *msg, u64 passes)
|
|
|
|
{
|
|
|
|
prt_printf(out, "%s:\t", msg);
|
|
|
|
prt_bitflags(out, bch2_recovery_passes, passes);
|
|
|
|
prt_newline(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
void bch2_recovery_pass_status_to_text(struct printbuf *out, struct bch_fs *c)
|
|
|
|
{
|
|
|
|
struct bch_fs_recovery *r = &c->recovery;
|
|
|
|
|
|
|
|
printbuf_tabstop_push(out, 32);
|
|
|
|
prt_passes(out, "Scheduled passes", c->sb.recovery_passes_required);
|
|
|
|
prt_passes(out, "Scheduled online passes", c->sb.recovery_passes_required &
|
|
|
|
bch2_recovery_passes_match(PASS_ONLINE));
|
|
|
|
prt_passes(out, "Complete passes", r->passes_complete);
|
|
|
|
prt_passes(out, "Failing passes", r->passes_failing);
|
|
|
|
|
|
|
|
if (r->curr_pass) {
|
|
|
|
prt_printf(out, "Current pass:\t%s\n", bch2_recovery_passes[r->curr_pass]);
|
|
|
|
prt_passes(out, "Current passes", r->passes_to_run);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-09 23:22:23 -04:00
|
|
|
void bch2_fs_recovery_passes_init(struct bch_fs *c)
|
|
|
|
{
|
2025-05-10 18:21:49 -04:00
|
|
|
spin_lock_init(&c->recovery.lock);
|
|
|
|
sema_init(&c->recovery.run_lock, 1);
|
2025-05-09 23:28:01 -04:00
|
|
|
|
|
|
|
INIT_WORK(&c->recovery.work, bch2_async_recovery_passes_work);
|
2025-05-09 23:22:23 -04:00
|
|
|
}
|