mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

XFS stopped using current->journal_info in commit f2e812c152
("xfs:
don't use current->journal_info"), so there is no point in saving and
restoring it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
981 lines
24 KiB
C
981 lines
24 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_btree.h"
|
|
#include "xfs_btree_staging.h"
|
|
#include "xfs_buf_mem.h"
|
|
#include "xfs_btree_mem.h"
|
|
#include "xfs_bit.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_sb.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_rmap.h"
|
|
#include "xfs_rmap_btree.h"
|
|
#include "xfs_rtrmap_btree.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_icache.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_bmap_btree.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_rtalloc.h"
|
|
#include "xfs_ag.h"
|
|
#include "xfs_rtgroup.h"
|
|
#include "xfs_refcount.h"
|
|
#include "scrub/xfs_scrub.h"
|
|
#include "scrub/scrub.h"
|
|
#include "scrub/common.h"
|
|
#include "scrub/btree.h"
|
|
#include "scrub/trace.h"
|
|
#include "scrub/repair.h"
|
|
#include "scrub/bitmap.h"
|
|
#include "scrub/fsb_bitmap.h"
|
|
#include "scrub/rgb_bitmap.h"
|
|
#include "scrub/xfile.h"
|
|
#include "scrub/xfarray.h"
|
|
#include "scrub/iscan.h"
|
|
#include "scrub/newbt.h"
|
|
#include "scrub/reap.h"
|
|
|
|
/*
|
|
* Realtime Reverse Mapping Btree Repair
|
|
* =====================================
|
|
*
|
|
* This isn't quite as difficult as repairing the rmap btree on the data
|
|
* device, since we only store the data fork extents of realtime files on the
|
|
* realtime device. We still have to freeze the filesystem and stop the
|
|
* background threads like we do for the rmap repair, but we only have to scan
|
|
* realtime inodes.
|
|
*
|
|
* Collecting entries for the new realtime rmap btree is easy -- all we have
|
|
* to do is generate rtrmap entries from the data fork mappings of all realtime
|
|
* files in the filesystem. We then scan the rmap btrees of the data device
|
|
* looking for extents belonging to the old btree and note them in a bitmap.
|
|
*
|
|
* To rebuild the realtime rmap btree, we bulk-load the collected mappings into
|
|
* a new btree cursor and atomically swap that into the realtime inode. Then
|
|
* we can free the blocks from the old btree.
|
|
*
|
|
* We use the 'xrep_rtrmap' prefix for all the rmap functions.
|
|
*/
|
|
|
|
/* Context for collecting rmaps */
|
|
struct xrep_rtrmap {
|
|
/* new rtrmapbt information */
|
|
struct xrep_newbt new_btree;
|
|
|
|
/* lock for the xfbtree and xfile */
|
|
struct mutex lock;
|
|
|
|
/* rmap records generated from primary metadata */
|
|
struct xfbtree rtrmap_btree;
|
|
|
|
struct xfs_scrub *sc;
|
|
|
|
/* bitmap of old rtrmapbt blocks */
|
|
struct xfsb_bitmap old_rtrmapbt_blocks;
|
|
|
|
/* Hooks into rtrmap update code. */
|
|
struct xfs_rmap_hook rhook;
|
|
|
|
/* inode scan cursor */
|
|
struct xchk_iscan iscan;
|
|
|
|
/* in-memory btree cursor for the ->get_blocks walk */
|
|
struct xfs_btree_cur *mcur;
|
|
|
|
/* Number of records we're staging in the new btree. */
|
|
uint64_t nr_records;
|
|
};
|
|
|
|
/* Set us up to repair rt reverse mapping btrees. */
|
|
int
|
|
xrep_setup_rtrmapbt(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
struct xrep_rtrmap *rr;
|
|
char *descr;
|
|
int error;
|
|
|
|
xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
|
|
|
|
descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
|
|
error = xrep_setup_xfbtree(sc, descr);
|
|
kfree(descr);
|
|
if (error)
|
|
return error;
|
|
|
|
rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
|
|
if (!rr)
|
|
return -ENOMEM;
|
|
|
|
rr->sc = sc;
|
|
sc->buf = rr;
|
|
return 0;
|
|
}
|
|
|
|
/* Make sure there's nothing funny about this mapping. */
|
|
STATIC int
|
|
xrep_rtrmap_check_mapping(
|
|
struct xfs_scrub *sc,
|
|
const struct xfs_rmap_irec *rec)
|
|
{
|
|
if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL)
|
|
return -EFSCORRUPTED;
|
|
|
|
/* Make sure this isn't free space. */
|
|
return xrep_require_rtext_inuse(sc, rec->rm_startblock,
|
|
rec->rm_blockcount);
|
|
}
|
|
|
|
/* Store a reverse-mapping record. */
|
|
static inline int
|
|
xrep_rtrmap_stash(
|
|
struct xrep_rtrmap *rr,
|
|
xfs_rgblock_t startblock,
|
|
xfs_extlen_t blockcount,
|
|
uint64_t owner,
|
|
uint64_t offset,
|
|
unsigned int flags)
|
|
{
|
|
struct xfs_rmap_irec rmap = {
|
|
.rm_startblock = startblock,
|
|
.rm_blockcount = blockcount,
|
|
.rm_owner = owner,
|
|
.rm_offset = offset,
|
|
.rm_flags = flags,
|
|
};
|
|
struct xfs_scrub *sc = rr->sc;
|
|
struct xfs_btree_cur *mcur;
|
|
int error = 0;
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
return error;
|
|
|
|
if (xchk_iscan_aborted(&rr->iscan))
|
|
return -EFSCORRUPTED;
|
|
|
|
trace_xrep_rtrmap_found(sc->mp, &rmap);
|
|
|
|
/* Add entry to in-memory btree. */
|
|
mutex_lock(&rr->lock);
|
|
mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
|
|
error = xfs_rmap_map_raw(mcur, &rmap);
|
|
xfs_btree_del_cursor(mcur, error);
|
|
if (error)
|
|
goto out_cancel;
|
|
|
|
error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
|
|
if (error)
|
|
goto out_abort;
|
|
|
|
mutex_unlock(&rr->lock);
|
|
return 0;
|
|
|
|
out_cancel:
|
|
xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
|
|
out_abort:
|
|
xchk_iscan_abort(&rr->iscan);
|
|
mutex_unlock(&rr->lock);
|
|
return error;
|
|
}
|
|
|
|
/* Finding all file and bmbt extents. */
|
|
|
|
/* Context for accumulating rmaps for an inode fork. */
|
|
struct xrep_rtrmap_ifork {
|
|
/*
|
|
* Accumulate rmap data here to turn multiple adjacent bmaps into a
|
|
* single rmap.
|
|
*/
|
|
struct xfs_rmap_irec accum;
|
|
|
|
struct xrep_rtrmap *rr;
|
|
};
|
|
|
|
/* Stash an rmap that we accumulated while walking an inode fork. */
|
|
STATIC int
|
|
xrep_rtrmap_stash_accumulated(
|
|
struct xrep_rtrmap_ifork *rf)
|
|
{
|
|
if (rf->accum.rm_blockcount == 0)
|
|
return 0;
|
|
|
|
return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
|
|
rf->accum.rm_blockcount, rf->accum.rm_owner,
|
|
rf->accum.rm_offset, rf->accum.rm_flags);
|
|
}
|
|
|
|
/* Accumulate a bmbt record. */
|
|
STATIC int
|
|
xrep_rtrmap_visit_bmbt(
|
|
struct xfs_btree_cur *cur,
|
|
struct xfs_bmbt_irec *rec,
|
|
void *priv)
|
|
{
|
|
struct xrep_rtrmap_ifork *rf = priv;
|
|
struct xfs_rmap_irec *accum = &rf->accum;
|
|
struct xfs_mount *mp = rf->rr->sc->mp;
|
|
xfs_rgblock_t rgbno;
|
|
unsigned int rmap_flags = 0;
|
|
int error;
|
|
|
|
if (xfs_rtb_to_rgno(mp, rec->br_startblock) !=
|
|
rtg_rgno(rf->rr->sc->sr.rtg))
|
|
return 0;
|
|
|
|
if (rec->br_state == XFS_EXT_UNWRITTEN)
|
|
rmap_flags |= XFS_RMAP_UNWRITTEN;
|
|
|
|
/* If this bmap is adjacent to the previous one, just add it. */
|
|
rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock);
|
|
if (accum->rm_blockcount > 0 &&
|
|
rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
|
|
rgbno == accum->rm_startblock + accum->rm_blockcount &&
|
|
rmap_flags == accum->rm_flags) {
|
|
accum->rm_blockcount += rec->br_blockcount;
|
|
return 0;
|
|
}
|
|
|
|
/* Otherwise stash the old rmap and start accumulating a new one. */
|
|
error = xrep_rtrmap_stash_accumulated(rf);
|
|
if (error)
|
|
return error;
|
|
|
|
accum->rm_startblock = rgbno;
|
|
accum->rm_blockcount = rec->br_blockcount;
|
|
accum->rm_offset = rec->br_startoff;
|
|
accum->rm_flags = rmap_flags;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Iterate the block mapping btree to collect rmap records for anything in this
|
|
* fork that maps to the rt volume. Sets @mappings_done to true if we've
|
|
* scanned the block mappings in this fork.
|
|
*/
|
|
STATIC int
|
|
xrep_rtrmap_scan_bmbt(
|
|
struct xrep_rtrmap_ifork *rf,
|
|
struct xfs_inode *ip,
|
|
bool *mappings_done)
|
|
{
|
|
struct xrep_rtrmap *rr = rf->rr;
|
|
struct xfs_btree_cur *cur;
|
|
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
|
int error = 0;
|
|
|
|
*mappings_done = false;
|
|
|
|
/*
|
|
* If the incore extent cache is already loaded, we'll just use the
|
|
* incore extent scanner to record mappings. Don't bother walking the
|
|
* ondisk extent tree.
|
|
*/
|
|
if (!xfs_need_iread_extents(ifp))
|
|
return 0;
|
|
|
|
/* Accumulate all the mappings in the bmap btree. */
|
|
cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
|
|
error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
|
|
xfs_btree_del_cursor(cur, error);
|
|
if (error)
|
|
return error;
|
|
|
|
/* Stash any remaining accumulated rmaps and exit. */
|
|
*mappings_done = true;
|
|
return xrep_rtrmap_stash_accumulated(rf);
|
|
}
|
|
|
|
/*
|
|
* Iterate the in-core extent cache to collect rmap records for anything in
|
|
* this fork that matches the AG.
|
|
*/
|
|
STATIC int
|
|
xrep_rtrmap_scan_iext(
|
|
struct xrep_rtrmap_ifork *rf,
|
|
struct xfs_ifork *ifp)
|
|
{
|
|
struct xfs_bmbt_irec rec;
|
|
struct xfs_iext_cursor icur;
|
|
int error;
|
|
|
|
for_each_xfs_iext(ifp, &icur, &rec) {
|
|
if (isnullstartblock(rec.br_startblock))
|
|
continue;
|
|
error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
return xrep_rtrmap_stash_accumulated(rf);
|
|
}
|
|
|
|
/* Find all the extents on the realtime device mapped by an inode fork. */
|
|
STATIC int
|
|
xrep_rtrmap_scan_dfork(
|
|
struct xrep_rtrmap *rr,
|
|
struct xfs_inode *ip)
|
|
{
|
|
struct xrep_rtrmap_ifork rf = {
|
|
.accum = { .rm_owner = ip->i_ino, },
|
|
.rr = rr,
|
|
};
|
|
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
|
int error = 0;
|
|
|
|
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
|
|
bool mappings_done;
|
|
|
|
/*
|
|
* Scan the bmbt for mappings. If the incore extent tree is
|
|
* loaded, we want to scan the cached mappings since that's
|
|
* faster when the extent counts are very high.
|
|
*/
|
|
error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
|
|
if (error || mappings_done)
|
|
return error;
|
|
} else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
|
|
/* realtime data forks should only be extents or btree */
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
/* Scan incore extent cache. */
|
|
return xrep_rtrmap_scan_iext(&rf, ifp);
|
|
}
|
|
|
|
/* Record reverse mappings for a file. */
|
|
STATIC int
|
|
xrep_rtrmap_scan_inode(
|
|
struct xrep_rtrmap *rr,
|
|
struct xfs_inode *ip)
|
|
{
|
|
unsigned int lock_mode;
|
|
int error = 0;
|
|
|
|
/* Skip the rt rmap btree inode. */
|
|
if (rr->sc->ip == ip)
|
|
return 0;
|
|
|
|
lock_mode = xfs_ilock_data_map_shared(ip);
|
|
|
|
/* Check the data fork if it's on the realtime device. */
|
|
if (XFS_IS_REALTIME_INODE(ip)) {
|
|
error = xrep_rtrmap_scan_dfork(rr, ip);
|
|
if (error)
|
|
goto out_unlock;
|
|
}
|
|
|
|
xchk_iscan_mark_visited(&rr->iscan, ip);
|
|
out_unlock:
|
|
xfs_iunlock(ip, lock_mode);
|
|
return error;
|
|
}
|
|
|
|
/* Record extents that belong to the realtime rmap inode. */
|
|
STATIC int
|
|
xrep_rtrmap_walk_rmap(
|
|
struct xfs_btree_cur *cur,
|
|
const struct xfs_rmap_irec *rec,
|
|
void *priv)
|
|
{
|
|
struct xrep_rtrmap *rr = priv;
|
|
int error = 0;
|
|
|
|
if (xchk_should_terminate(rr->sc, &error))
|
|
return error;
|
|
|
|
/* Skip extents which are not owned by this inode and fork. */
|
|
if (rec->rm_owner != rr->sc->ip->i_ino)
|
|
return 0;
|
|
|
|
error = xrep_check_ino_btree_mapping(rr->sc, rec);
|
|
if (error)
|
|
return error;
|
|
|
|
return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks,
|
|
xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
|
|
rec->rm_blockcount);
|
|
}
|
|
|
|
/* Scan one AG for reverse mappings for the realtime rmap btree. */
|
|
STATIC int
|
|
xrep_rtrmap_scan_ag(
|
|
struct xrep_rtrmap *rr,
|
|
struct xfs_perag *pag)
|
|
{
|
|
struct xfs_scrub *sc = rr->sc;
|
|
int error;
|
|
|
|
error = xrep_ag_init(sc, pag, &sc->sa);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
|
|
xchk_ag_free(sc, &sc->sa);
|
|
return error;
|
|
}
|
|
|
|
struct xrep_rtrmap_stash_run {
|
|
struct xrep_rtrmap *rr;
|
|
uint64_t owner;
|
|
};
|
|
|
|
static int
|
|
xrep_rtrmap_stash_run(
|
|
uint32_t start,
|
|
uint32_t len,
|
|
void *priv)
|
|
{
|
|
struct xrep_rtrmap_stash_run *rsr = priv;
|
|
struct xrep_rtrmap *rr = rsr->rr;
|
|
xfs_rgblock_t rgbno = start;
|
|
|
|
return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0);
|
|
}
|
|
|
|
/*
|
|
* Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
|
|
* that the ranges are in units of FS blocks.
|
|
*/
|
|
STATIC int
|
|
xrep_rtrmap_stash_bitmap(
|
|
struct xrep_rtrmap *rr,
|
|
struct xrgb_bitmap *bitmap,
|
|
const struct xfs_owner_info *oinfo)
|
|
{
|
|
struct xrep_rtrmap_stash_run rsr = {
|
|
.rr = rr,
|
|
.owner = oinfo->oi_owner,
|
|
};
|
|
|
|
return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr);
|
|
}
|
|
|
|
/* Record a CoW staging extent. */
|
|
STATIC int
|
|
xrep_rtrmap_walk_cowblocks(
|
|
struct xfs_btree_cur *cur,
|
|
const struct xfs_refcount_irec *irec,
|
|
void *priv)
|
|
{
|
|
struct xrgb_bitmap *bitmap = priv;
|
|
|
|
if (!xfs_refcount_check_domain(irec) ||
|
|
irec->rc_domain != XFS_REFC_DOMAIN_COW)
|
|
return -EFSCORRUPTED;
|
|
|
|
return xrgb_bitmap_set(bitmap, irec->rc_startblock,
|
|
irec->rc_blockcount);
|
|
}
|
|
|
|
/*
|
|
* Collect rmaps for the blocks containing the refcount btree, and all CoW
|
|
* staging extents.
|
|
*/
|
|
STATIC int
|
|
xrep_rtrmap_find_refcount_rmaps(
|
|
struct xrep_rtrmap *rr)
|
|
{
|
|
struct xrgb_bitmap cow_blocks; /* COWBIT */
|
|
struct xfs_refcount_irec low = {
|
|
.rc_startblock = 0,
|
|
.rc_domain = XFS_REFC_DOMAIN_COW,
|
|
};
|
|
struct xfs_refcount_irec high = {
|
|
.rc_startblock = -1U,
|
|
.rc_domain = XFS_REFC_DOMAIN_COW,
|
|
};
|
|
struct xfs_scrub *sc = rr->sc;
|
|
int error;
|
|
|
|
if (!xfs_has_rtreflink(sc->mp))
|
|
return 0;
|
|
|
|
xrgb_bitmap_init(&cow_blocks);
|
|
|
|
/* Collect rmaps for CoW staging extents. */
|
|
error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high,
|
|
xrep_rtrmap_walk_cowblocks, &cow_blocks);
|
|
if (error)
|
|
goto out_bitmap;
|
|
|
|
/* Generate rmaps for everything. */
|
|
error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
|
|
if (error)
|
|
goto out_bitmap;
|
|
|
|
out_bitmap:
|
|
xrgb_bitmap_destroy(&cow_blocks);
|
|
return error;
|
|
}
|
|
|
|
/* Count and check all collected records. */
|
|
STATIC int
|
|
xrep_rtrmap_check_record(
|
|
struct xfs_btree_cur *cur,
|
|
const struct xfs_rmap_irec *rec,
|
|
void *priv)
|
|
{
|
|
struct xrep_rtrmap *rr = priv;
|
|
int error;
|
|
|
|
error = xrep_rtrmap_check_mapping(rr->sc, rec);
|
|
if (error)
|
|
return error;
|
|
|
|
rr->nr_records++;
|
|
return 0;
|
|
}
|
|
|
|
/* Generate all the reverse-mappings for the realtime device. */
|
|
STATIC int
|
|
xrep_rtrmap_find_rmaps(
|
|
struct xrep_rtrmap *rr)
|
|
{
|
|
struct xfs_scrub *sc = rr->sc;
|
|
struct xfs_perag *pag = NULL;
|
|
struct xfs_inode *ip;
|
|
struct xfs_btree_cur *mcur;
|
|
int error;
|
|
|
|
/* Generate rmaps for the realtime superblock */
|
|
if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) {
|
|
error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
|
|
XFS_RMAP_OWN_FS, 0, 0);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/* Find CoW staging extents. */
|
|
xrep_rtgroup_btcur_init(sc, &sc->sr);
|
|
error = xrep_rtrmap_find_refcount_rmaps(rr);
|
|
xchk_rtgroup_btcur_free(&sc->sr);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* Set up for a potentially lengthy filesystem scan by reducing our
|
|
* transaction resource usage for the duration. Specifically:
|
|
*
|
|
* Unlock the realtime metadata inodes and cancel the transaction to
|
|
* release the log grant space while we scan the filesystem.
|
|
*
|
|
* Create a new empty transaction to eliminate the possibility of the
|
|
* inode scan deadlocking on cyclical metadata.
|
|
*
|
|
* We pass the empty transaction to the file scanning function to avoid
|
|
* repeatedly cycling empty transactions. This can be done even though
|
|
* we take the IOLOCK to quiesce the file because empty transactions
|
|
* do not take sb_internal.
|
|
*/
|
|
xchk_trans_cancel(sc);
|
|
xchk_rtgroup_unlock(&sc->sr);
|
|
xchk_trans_alloc_empty(sc);
|
|
|
|
while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
|
|
error = xrep_rtrmap_scan_inode(rr, ip);
|
|
xchk_irele(sc, ip);
|
|
if (error)
|
|
break;
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
break;
|
|
}
|
|
xchk_iscan_iter_finish(&rr->iscan);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* Switch out for a real transaction and lock the RT metadata in
|
|
* preparation for building a new tree.
|
|
*/
|
|
xchk_trans_cancel(sc);
|
|
error = xchk_setup_rt(sc);
|
|
if (error)
|
|
return error;
|
|
error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* If a hook failed to update the in-memory btree, we lack the data to
|
|
* continue the repair.
|
|
*/
|
|
if (xchk_iscan_aborted(&rr->iscan))
|
|
return -EFSCORRUPTED;
|
|
|
|
/* Scan for old rtrmap blocks. */
|
|
while ((pag = xfs_perag_next(sc->mp, pag))) {
|
|
error = xrep_rtrmap_scan_ag(rr, pag);
|
|
if (error) {
|
|
xfs_perag_rele(pag);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now that we have everything locked again, we need to count the
|
|
* number of rmap records stashed in the btree. This should reflect
|
|
* all actively-owned rt files in the filesystem. At the same time,
|
|
* check all our records before we start building a new btree, which
|
|
* requires the rtbitmap lock.
|
|
*/
|
|
mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
|
|
rr->nr_records = 0;
|
|
error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
|
|
xfs_btree_del_cursor(mcur, error);
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Building the new rtrmap btree. */
|
|
|
|
/* Retrieve rtrmapbt data for bulk load. */
|
|
STATIC int
|
|
xrep_rtrmap_get_records(
|
|
struct xfs_btree_cur *cur,
|
|
unsigned int idx,
|
|
struct xfs_btree_block *block,
|
|
unsigned int nr_wanted,
|
|
void *priv)
|
|
{
|
|
struct xrep_rtrmap *rr = priv;
|
|
union xfs_btree_rec *block_rec;
|
|
unsigned int loaded;
|
|
int error;
|
|
|
|
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
|
int stat = 0;
|
|
|
|
error = xfs_btree_increment(rr->mcur, 0, &stat);
|
|
if (error)
|
|
return error;
|
|
if (!stat)
|
|
return -EFSCORRUPTED;
|
|
|
|
error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
|
|
if (error)
|
|
return error;
|
|
if (!stat)
|
|
return -EFSCORRUPTED;
|
|
|
|
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
|
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
|
}
|
|
|
|
return loaded;
|
|
}
|
|
|
|
/* Feed one of the new btree blocks to the bulk loader. */
|
|
STATIC int
|
|
xrep_rtrmap_claim_block(
|
|
struct xfs_btree_cur *cur,
|
|
union xfs_btree_ptr *ptr,
|
|
void *priv)
|
|
{
|
|
struct xrep_rtrmap *rr = priv;
|
|
|
|
return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
|
|
}
|
|
|
|
/* Figure out how much space we need to create the incore btree root block. */
|
|
STATIC size_t
|
|
xrep_rtrmap_iroot_size(
|
|
struct xfs_btree_cur *cur,
|
|
unsigned int level,
|
|
unsigned int nr_this_level,
|
|
void *priv)
|
|
{
|
|
return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
|
|
}
|
|
|
|
/*
|
|
* Use the collected rmap information to stage a new rmap btree. If this is
|
|
* successful we'll return with the new btree root information logged to the
|
|
* repair transaction but not yet committed. This implements section (III)
|
|
* above.
|
|
*/
|
|
STATIC int
|
|
xrep_rtrmap_build_new_tree(
|
|
struct xrep_rtrmap *rr)
|
|
{
|
|
struct xfs_scrub *sc = rr->sc;
|
|
struct xfs_rtgroup *rtg = sc->sr.rtg;
|
|
struct xfs_btree_cur *rmap_cur;
|
|
int error;
|
|
|
|
/*
|
|
* Prepare to construct the new btree by reserving disk space for the
|
|
* new btree and setting up all the accounting information we'll need
|
|
* to root the new btree while it's under construction and before we
|
|
* attach it to the realtime rmapbt inode.
|
|
*/
|
|
error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
|
|
if (error)
|
|
return error;
|
|
|
|
rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
|
|
rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
|
|
rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
|
|
|
|
rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
|
|
xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
|
|
|
|
/* Compute how many blocks we'll need for the rmaps collected. */
|
|
error = xfs_btree_bload_compute_geometry(rmap_cur,
|
|
&rr->new_btree.bload, rr->nr_records);
|
|
if (error)
|
|
goto err_cur;
|
|
|
|
/* Last chance to abort before we start committing fixes. */
|
|
if (xchk_should_terminate(sc, &error))
|
|
goto err_cur;
|
|
|
|
/*
|
|
* Guess how many blocks we're going to need to rebuild an entire
|
|
* rtrmapbt from the number of extents we found, and pump up our
|
|
* transaction to have sufficient block reservation. We're allowed
|
|
* to exceed quota to repair inconsistent metadata, though this is
|
|
* unlikely.
|
|
*/
|
|
error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
|
|
rr->new_btree.bload.nr_blocks, 0, true);
|
|
if (error)
|
|
goto err_cur;
|
|
|
|
/* Reserve the space we'll need for the new btree. */
|
|
error = xrep_newbt_alloc_blocks(&rr->new_btree,
|
|
rr->new_btree.bload.nr_blocks);
|
|
if (error)
|
|
goto err_cur;
|
|
|
|
/*
|
|
* Create a cursor to the in-memory btree so that we can bulk load the
|
|
* new btree.
|
|
*/
|
|
rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
|
|
error = xfs_btree_goto_left_edge(rr->mcur);
|
|
if (error)
|
|
goto err_mcur;
|
|
|
|
/* Add all observed rmap records. */
|
|
rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
|
|
error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
|
|
if (error)
|
|
goto err_mcur;
|
|
|
|
/*
|
|
* Install the new rtrmap btree in the inode. After this point the old
|
|
* btree is no longer accessible, the new tree is live, and we can
|
|
* delete the cursor.
|
|
*/
|
|
xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
|
|
xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
|
|
xfs_btree_del_cursor(rmap_cur, 0);
|
|
xfs_btree_del_cursor(rr->mcur, 0);
|
|
rr->mcur = NULL;
|
|
|
|
/*
|
|
* Now that we've written the new btree to disk, we don't need to keep
|
|
* updating the in-memory btree. Abort the scan to stop live updates.
|
|
*/
|
|
xchk_iscan_abort(&rr->iscan);
|
|
|
|
/* Dispose of any unused blocks and the accounting information. */
|
|
error = xrep_newbt_commit(&rr->new_btree);
|
|
if (error)
|
|
return error;
|
|
|
|
return xrep_roll_trans(sc);
|
|
|
|
err_mcur:
|
|
xfs_btree_del_cursor(rr->mcur, error);
|
|
err_cur:
|
|
xfs_btree_del_cursor(rmap_cur, error);
|
|
xrep_newbt_cancel(&rr->new_btree);
|
|
return error;
|
|
}
|
|
|
|
/* Reaping the old btree. */
|
|
|
|
static inline bool
|
|
xrep_rtrmapbt_want_live_update(
|
|
struct xchk_iscan *iscan,
|
|
const struct xfs_owner_info *oi)
|
|
{
|
|
if (xchk_iscan_aborted(iscan))
|
|
return false;
|
|
|
|
/*
|
|
* We scanned the CoW staging extents before we started the iscan, so
|
|
* we need all the updates.
|
|
*/
|
|
if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
|
|
return true;
|
|
|
|
/* Ignore updates to files that the scanner hasn't visited yet. */
|
|
return xchk_iscan_want_live_update(iscan, oi->oi_owner);
|
|
}
|
|
|
|
/*
|
|
* Apply a rtrmapbt update from the regular filesystem into our shadow btree.
|
|
* We're running from the thread that owns the rtrmap ILOCK and is generating
|
|
* the update, so we must be careful about which parts of the struct
|
|
* xrep_rtrmap that we change.
|
|
*/
|
|
static int
|
|
xrep_rtrmapbt_live_update(
|
|
struct notifier_block *nb,
|
|
unsigned long action,
|
|
void *data)
|
|
{
|
|
struct xfs_rmap_update_params *p = data;
|
|
struct xrep_rtrmap *rr;
|
|
struct xfs_mount *mp;
|
|
struct xfs_btree_cur *mcur;
|
|
struct xfs_trans *tp;
|
|
int error;
|
|
|
|
rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
|
|
mp = rr->sc->mp;
|
|
|
|
if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
|
|
goto out_unlock;
|
|
|
|
trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
|
|
|
|
tp = xfs_trans_alloc_empty(mp);
|
|
|
|
mutex_lock(&rr->lock);
|
|
mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
|
|
error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
|
|
p->blockcount, &p->oinfo, p->unwritten);
|
|
xfs_btree_del_cursor(mcur, error);
|
|
if (error)
|
|
goto out_cancel;
|
|
|
|
error = xfbtree_trans_commit(&rr->rtrmap_btree, tp);
|
|
if (error)
|
|
goto out_cancel;
|
|
|
|
xfs_trans_cancel(tp);
|
|
mutex_unlock(&rr->lock);
|
|
return NOTIFY_DONE;
|
|
|
|
out_cancel:
|
|
xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
|
|
xfs_trans_cancel(tp);
|
|
xchk_iscan_abort(&rr->iscan);
|
|
mutex_unlock(&rr->lock);
|
|
out_unlock:
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
/* Set up the filesystem scan components. */
|
|
STATIC int
|
|
xrep_rtrmap_setup_scan(
|
|
struct xrep_rtrmap *rr)
|
|
{
|
|
struct xfs_scrub *sc = rr->sc;
|
|
int error;
|
|
|
|
mutex_init(&rr->lock);
|
|
xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
|
|
|
|
/* Set up some storage */
|
|
error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
|
|
rtg_rgno(sc->sr.rtg));
|
|
if (error)
|
|
goto out_bitmap;
|
|
|
|
/* Retry iget every tenth of a second for up to 30 seconds. */
|
|
xchk_iscan_start(sc, 30000, 100, &rr->iscan);
|
|
|
|
/*
|
|
* Hook into live rtrmap operations so that we can update our in-memory
|
|
* btree to reflect live changes on the filesystem. Since we drop the
|
|
* rtrmap ILOCK to scan all the inodes, we need this piece to avoid
|
|
* installing a stale btree.
|
|
*/
|
|
ASSERT(sc->flags & XCHK_FSGATES_RMAP);
|
|
xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
|
|
error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook);
|
|
if (error)
|
|
goto out_iscan;
|
|
return 0;
|
|
|
|
out_iscan:
|
|
xchk_iscan_teardown(&rr->iscan);
|
|
xfbtree_destroy(&rr->rtrmap_btree);
|
|
out_bitmap:
|
|
xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
|
|
mutex_destroy(&rr->lock);
|
|
return error;
|
|
}
|
|
|
|
/* Tear down scan components. */
|
|
STATIC void
|
|
xrep_rtrmap_teardown(
|
|
struct xrep_rtrmap *rr)
|
|
{
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
xchk_iscan_abort(&rr->iscan);
|
|
xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook);
|
|
xchk_iscan_teardown(&rr->iscan);
|
|
xfbtree_destroy(&rr->rtrmap_btree);
|
|
xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
|
|
mutex_destroy(&rr->lock);
|
|
}
|
|
|
|
/* Repair the realtime rmap btree. */
|
|
int
|
|
xrep_rtrmapbt(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
struct xrep_rtrmap *rr = sc->buf;
|
|
int error;
|
|
|
|
/* Make sure any problems with the fork are fixed. */
|
|
error = xrep_metadata_inode_forks(sc);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xrep_rtrmap_setup_scan(rr);
|
|
if (error)
|
|
return error;
|
|
|
|
/* Collect rmaps for realtime files. */
|
|
error = xrep_rtrmap_find_rmaps(rr);
|
|
if (error)
|
|
goto out_records;
|
|
|
|
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
|
|
|
/* Rebuild the rtrmap information. */
|
|
error = xrep_rtrmap_build_new_tree(rr);
|
|
if (error)
|
|
goto out_records;
|
|
|
|
/*
|
|
* Free all the extents that were allocated to the former rtrmapbt and
|
|
* aren't cross-linked with something else.
|
|
*/
|
|
error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
|
|
if (error)
|
|
goto out_records;
|
|
|
|
out_records:
|
|
xrep_rtrmap_teardown(rr);
|
|
return error;
|
|
}
|