mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Split busy extent tracking from struct xfs_perag into its own private structure, which can be pointed to by the generic group structure. Note that this structure is now dynamically allocated instead of embedded as the upcoming zone XFS code doesn't need it and will also have an unusually high number of groups due to hardware constraints. Dynamically allocating the structure this is a big memory saver for this case. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
933 lines
25 KiB
C
933 lines
25 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_btree.h"
|
|
#include "xfs_btree_staging.h"
|
|
#include "xfs_bit.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_sb.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_alloc_btree.h"
|
|
#include "xfs_rmap.h"
|
|
#include "xfs_rmap_btree.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_refcount.h"
|
|
#include "xfs_extent_busy.h"
|
|
#include "xfs_health.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_ialloc.h"
|
|
#include "xfs_ag.h"
|
|
#include "scrub/xfs_scrub.h"
|
|
#include "scrub/scrub.h"
|
|
#include "scrub/common.h"
|
|
#include "scrub/btree.h"
|
|
#include "scrub/trace.h"
|
|
#include "scrub/repair.h"
|
|
#include "scrub/bitmap.h"
|
|
#include "scrub/agb_bitmap.h"
|
|
#include "scrub/xfile.h"
|
|
#include "scrub/xfarray.h"
|
|
#include "scrub/newbt.h"
|
|
#include "scrub/reap.h"
|
|
|
|
/*
|
|
* Free Space Btree Repair
|
|
* =======================
|
|
*
|
|
* The reverse mappings are supposed to record all space usage for the entire
|
|
* AG. Therefore, we can recreate the free extent records in an AG by looking
|
|
* for gaps in the physical extents recorded in the rmapbt. These records are
|
|
* staged in @free_records. Identifying the gaps is more difficult on a
|
|
* reflink filesystem because rmap records are allowed to overlap.
|
|
*
|
|
* Because the final step of building a new index is to free the space used by
|
|
* the old index, repair needs to find that space. Unfortunately, all
|
|
* structures that live in the free space (bnobt, cntbt, rmapbt, agfl) share
|
|
* the same rmapbt owner code (OWN_AG), so this is not straightforward.
|
|
*
|
|
* The scan of the reverse mapping information records the space used by OWN_AG
|
|
* in @old_allocbt_blocks, which (at this stage) is somewhat misnamed. While
|
|
* walking the rmapbt records, we create a second bitmap @not_allocbt_blocks to
|
|
* record all visited rmap btree blocks and all blocks owned by the AGFL.
|
|
*
|
|
* After that is where the definitions of old_allocbt_blocks shifts. This
|
|
* expression identifies possible former bnobt/cntbt blocks:
|
|
*
|
|
* (OWN_AG blocks) & ~(rmapbt blocks | agfl blocks);
|
|
*
|
|
* Substituting from above definitions, that becomes:
|
|
*
|
|
* old_allocbt_blocks & ~not_allocbt_blocks
|
|
*
|
|
* The OWN_AG bitmap itself isn't needed after this point, so what we really do
|
|
* instead is:
|
|
*
|
|
* old_allocbt_blocks &= ~not_allocbt_blocks;
|
|
*
|
|
* After this point, @old_allocbt_blocks is a bitmap of alleged former
|
|
* bnobt/cntbt blocks. The xagb_bitmap_disunion operation modifies its first
|
|
* parameter in place to avoid copying records around.
|
|
*
|
|
* Next, some of the space described by @free_records are diverted to the newbt
|
|
* reservation and used to format new btree blocks. The remaining records are
|
|
* written to the new btree indices. We reconstruct both bnobt and cntbt at
|
|
* the same time since we've already done all the work.
|
|
*
|
|
* We use the prefix 'xrep_abt' here because we regenerate both free space
|
|
* allocation btrees at the same time.
|
|
*/
|
|
|
|
struct xrep_abt {
|
|
/* Blocks owned by the rmapbt or the agfl. */
|
|
struct xagb_bitmap not_allocbt_blocks;
|
|
|
|
/* All OWN_AG blocks. */
|
|
struct xagb_bitmap old_allocbt_blocks;
|
|
|
|
/*
|
|
* New bnobt information. All btree block reservations are added to
|
|
* the reservation list in new_bnobt.
|
|
*/
|
|
struct xrep_newbt new_bnobt;
|
|
|
|
/* new cntbt information */
|
|
struct xrep_newbt new_cntbt;
|
|
|
|
/* Free space extents. */
|
|
struct xfarray *free_records;
|
|
|
|
struct xfs_scrub *sc;
|
|
|
|
/* Number of non-null records in @free_records. */
|
|
uint64_t nr_real_records;
|
|
|
|
/* get_records()'s position in the free space record array. */
|
|
xfarray_idx_t array_cur;
|
|
|
|
/*
|
|
* Next block we anticipate seeing in the rmap records. If the next
|
|
* rmap record is greater than next_agbno, we have found unused space.
|
|
*/
|
|
xfs_agblock_t next_agbno;
|
|
|
|
/* Number of free blocks in this AG. */
|
|
xfs_agblock_t nr_blocks;
|
|
|
|
/* Longest free extent we found in the AG. */
|
|
xfs_agblock_t longest;
|
|
};
|
|
|
|
/* Set up to repair AG free space btrees. */
|
|
int
|
|
xrep_setup_ag_allocbt(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
struct xfs_group *xg = pag_group(sc->sa.pag);
|
|
unsigned int busy_gen;
|
|
|
|
/*
|
|
* Make sure the busy extent list is clear because we can't put extents
|
|
* on there twice.
|
|
*/
|
|
if (xfs_extent_busy_list_empty(xg, &busy_gen))
|
|
return 0;
|
|
return xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
|
|
}
|
|
|
|
/* Check for any obvious conflicts in the free extent. */
|
|
STATIC int
|
|
xrep_abt_check_free_ext(
|
|
struct xfs_scrub *sc,
|
|
const struct xfs_alloc_rec_incore *rec)
|
|
{
|
|
enum xbtree_recpacking outcome;
|
|
int error;
|
|
|
|
if (xfs_alloc_check_irec(sc->sa.pag, rec) != NULL)
|
|
return -EFSCORRUPTED;
|
|
|
|
/* Must not be an inode chunk. */
|
|
error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
|
|
rec->ar_startblock, rec->ar_blockcount, &outcome);
|
|
if (error)
|
|
return error;
|
|
if (outcome != XBTREE_RECPACKING_EMPTY)
|
|
return -EFSCORRUPTED;
|
|
|
|
/* Must not be shared or CoW staging. */
|
|
if (sc->sa.refc_cur) {
|
|
error = xfs_refcount_has_records(sc->sa.refc_cur,
|
|
XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
|
|
rec->ar_blockcount, &outcome);
|
|
if (error)
|
|
return error;
|
|
if (outcome != XBTREE_RECPACKING_EMPTY)
|
|
return -EFSCORRUPTED;
|
|
|
|
error = xfs_refcount_has_records(sc->sa.refc_cur,
|
|
XFS_REFC_DOMAIN_COW, rec->ar_startblock,
|
|
rec->ar_blockcount, &outcome);
|
|
if (error)
|
|
return error;
|
|
if (outcome != XBTREE_RECPACKING_EMPTY)
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Stash a free space record for all the space since the last bno we found
|
|
* all the way up to @end.
|
|
*/
|
|
static int
|
|
xrep_abt_stash(
|
|
struct xrep_abt *ra,
|
|
xfs_agblock_t end)
|
|
{
|
|
struct xfs_alloc_rec_incore arec = {
|
|
.ar_startblock = ra->next_agbno,
|
|
.ar_blockcount = end - ra->next_agbno,
|
|
};
|
|
struct xfs_scrub *sc = ra->sc;
|
|
int error = 0;
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
return error;
|
|
|
|
error = xrep_abt_check_free_ext(ra->sc, &arec);
|
|
if (error)
|
|
return error;
|
|
|
|
trace_xrep_abt_found(sc->sa.pag, &arec);
|
|
|
|
error = xfarray_append(ra->free_records, &arec);
|
|
if (error)
|
|
return error;
|
|
|
|
ra->nr_blocks += arec.ar_blockcount;
|
|
return 0;
|
|
}
|
|
|
|
/* Record extents that aren't in use from gaps in the rmap records. */
|
|
STATIC int
|
|
xrep_abt_walk_rmap(
|
|
struct xfs_btree_cur *cur,
|
|
const struct xfs_rmap_irec *rec,
|
|
void *priv)
|
|
{
|
|
struct xrep_abt *ra = priv;
|
|
int error;
|
|
|
|
/* Record all the OWN_AG blocks... */
|
|
if (rec->rm_owner == XFS_RMAP_OWN_AG) {
|
|
error = xagb_bitmap_set(&ra->old_allocbt_blocks,
|
|
rec->rm_startblock, rec->rm_blockcount);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/* ...and all the rmapbt blocks... */
|
|
error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
|
|
if (error)
|
|
return error;
|
|
|
|
/* ...and all the free space. */
|
|
if (rec->rm_startblock > ra->next_agbno) {
|
|
error = xrep_abt_stash(ra, rec->rm_startblock);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* rmap records can overlap on reflink filesystems, so project
|
|
* next_agbno as far out into the AG space as we currently know about.
|
|
*/
|
|
ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
|
|
rec->rm_startblock + rec->rm_blockcount);
|
|
return 0;
|
|
}
|
|
|
|
/* Collect an AGFL block for the not-to-release list. */
|
|
static int
|
|
xrep_abt_walk_agfl(
|
|
struct xfs_mount *mp,
|
|
xfs_agblock_t agbno,
|
|
void *priv)
|
|
{
|
|
struct xrep_abt *ra = priv;
|
|
|
|
return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
|
|
}
|
|
|
|
/*
|
|
* Compare two free space extents by block number. We want to sort in order of
|
|
* increasing block number.
|
|
*/
|
|
static int
|
|
xrep_bnobt_extent_cmp(
|
|
const void *a,
|
|
const void *b)
|
|
{
|
|
const struct xfs_alloc_rec_incore *ap = a;
|
|
const struct xfs_alloc_rec_incore *bp = b;
|
|
|
|
if (ap->ar_startblock > bp->ar_startblock)
|
|
return 1;
|
|
else if (ap->ar_startblock < bp->ar_startblock)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Re-sort the free extents by block number so that we can put the records into
|
|
* the bnobt in the correct order. Make sure the records do not overlap in
|
|
* physical space.
|
|
*/
|
|
STATIC int
|
|
xrep_bnobt_sort_records(
|
|
struct xrep_abt *ra)
|
|
{
|
|
struct xfs_alloc_rec_incore arec;
|
|
xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
|
|
xfs_agblock_t next_agbno = 0;
|
|
int error;
|
|
|
|
error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
|
|
if (error)
|
|
return error;
|
|
|
|
while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
|
|
if (arec.ar_startblock < next_agbno)
|
|
return -EFSCORRUPTED;
|
|
|
|
next_agbno = arec.ar_startblock + arec.ar_blockcount;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Compare two free space extents by length and then block number. We want
|
|
* to sort first in order of increasing length and then in order of increasing
|
|
* block number.
|
|
*/
|
|
static int
|
|
xrep_cntbt_extent_cmp(
|
|
const void *a,
|
|
const void *b)
|
|
{
|
|
const struct xfs_alloc_rec_incore *ap = a;
|
|
const struct xfs_alloc_rec_incore *bp = b;
|
|
|
|
if (ap->ar_blockcount > bp->ar_blockcount)
|
|
return 1;
|
|
else if (ap->ar_blockcount < bp->ar_blockcount)
|
|
return -1;
|
|
return xrep_bnobt_extent_cmp(a, b);
|
|
}
|
|
|
|
/*
|
|
* Sort the free extents by length so so that we can put the records into the
|
|
* cntbt in the correct order. Don't let userspace kill us if we're resorting
|
|
* after allocating btree blocks.
|
|
*/
|
|
STATIC int
|
|
xrep_cntbt_sort_records(
|
|
struct xrep_abt *ra,
|
|
bool is_resort)
|
|
{
|
|
return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
|
|
is_resort ? 0 : XFARRAY_SORT_KILLABLE);
|
|
}
|
|
|
|
/*
|
|
* Iterate all reverse mappings to find (1) the gaps between rmap records (all
|
|
* unowned space), (2) the OWN_AG extents (which encompass the free space
|
|
* btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
|
|
* blocks. The free space is (1) + (2) - (3) - (4).
|
|
*/
|
|
STATIC int
|
|
xrep_abt_find_freespace(
|
|
struct xrep_abt *ra)
|
|
{
|
|
struct xfs_scrub *sc = ra->sc;
|
|
struct xfs_mount *mp = sc->mp;
|
|
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
|
|
struct xfs_buf *agfl_bp;
|
|
xfs_agblock_t agend;
|
|
int error;
|
|
|
|
xagb_bitmap_init(&ra->not_allocbt_blocks);
|
|
|
|
xrep_ag_btcur_init(sc, &sc->sa);
|
|
|
|
/*
|
|
* Iterate all the reverse mappings to find gaps in the physical
|
|
* mappings, all the OWN_AG blocks, and all the rmapbt extents.
|
|
*/
|
|
error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
|
|
if (error)
|
|
goto err;
|
|
|
|
/* Insert a record for space between the last rmap and EOAG. */
|
|
agend = be32_to_cpu(agf->agf_length);
|
|
if (ra->next_agbno < agend) {
|
|
error = xrep_abt_stash(ra, agend);
|
|
if (error)
|
|
goto err;
|
|
}
|
|
|
|
/* Collect all the AGFL blocks. */
|
|
error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
|
|
if (error)
|
|
goto err;
|
|
|
|
error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
|
|
if (error)
|
|
goto err_agfl;
|
|
|
|
/* Compute the old bnobt/cntbt blocks. */
|
|
error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
|
|
&ra->not_allocbt_blocks);
|
|
if (error)
|
|
goto err_agfl;
|
|
|
|
ra->nr_real_records = xfarray_length(ra->free_records);
|
|
err_agfl:
|
|
xfs_trans_brelse(sc->tp, agfl_bp);
|
|
err:
|
|
xchk_ag_btcur_free(&sc->sa);
|
|
xagb_bitmap_destroy(&ra->not_allocbt_blocks);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* We're going to use the observed free space records to reserve blocks for the
|
|
* new free space btrees, so we play an iterative game where we try to converge
|
|
* on the number of blocks we need:
|
|
*
|
|
* 1. Estimate how many blocks we'll need to store the records.
|
|
* 2. If the first free record has more blocks than we need, we're done.
|
|
* We will have to re-sort the records prior to building the cntbt.
|
|
* 3. If that record has exactly the number of blocks we need, null out the
|
|
* record. We're done.
|
|
* 4. Otherwise, we still need more blocks. Null out the record, subtract its
|
|
* length from the number of blocks we need, and go back to step 1.
|
|
*
|
|
* Fortunately, we don't have to do any transaction work to play this game, so
|
|
* we don't have to tear down the staging cursors.
|
|
*/
|
|
STATIC int
|
|
xrep_abt_reserve_space(
|
|
struct xrep_abt *ra,
|
|
struct xfs_btree_cur *bno_cur,
|
|
struct xfs_btree_cur *cnt_cur,
|
|
bool *needs_resort)
|
|
{
|
|
struct xfs_scrub *sc = ra->sc;
|
|
xfarray_idx_t record_nr;
|
|
unsigned int allocated = 0;
|
|
int error = 0;
|
|
|
|
record_nr = xfarray_length(ra->free_records) - 1;
|
|
do {
|
|
struct xfs_alloc_rec_incore arec;
|
|
uint64_t required;
|
|
unsigned int desired;
|
|
unsigned int len;
|
|
|
|
/* Compute how many blocks we'll need. */
|
|
error = xfs_btree_bload_compute_geometry(cnt_cur,
|
|
&ra->new_cntbt.bload, ra->nr_real_records);
|
|
if (error)
|
|
break;
|
|
|
|
error = xfs_btree_bload_compute_geometry(bno_cur,
|
|
&ra->new_bnobt.bload, ra->nr_real_records);
|
|
if (error)
|
|
break;
|
|
|
|
/* How many btree blocks do we need to store all records? */
|
|
required = ra->new_bnobt.bload.nr_blocks +
|
|
ra->new_cntbt.bload.nr_blocks;
|
|
ASSERT(required < INT_MAX);
|
|
|
|
/* If we've reserved enough blocks, we're done. */
|
|
if (allocated >= required)
|
|
break;
|
|
|
|
desired = required - allocated;
|
|
|
|
/* We need space but there's none left; bye! */
|
|
if (ra->nr_real_records == 0) {
|
|
error = -ENOSPC;
|
|
break;
|
|
}
|
|
|
|
/* Grab the first record from the list. */
|
|
error = xfarray_load(ra->free_records, record_nr, &arec);
|
|
if (error)
|
|
break;
|
|
|
|
ASSERT(arec.ar_blockcount <= UINT_MAX);
|
|
len = min_t(unsigned int, arec.ar_blockcount, desired);
|
|
|
|
trace_xrep_newbt_alloc_ag_blocks(sc->sa.pag, arec.ar_startblock,
|
|
len, XFS_RMAP_OWN_AG);
|
|
|
|
error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
|
|
arec.ar_startblock, len);
|
|
if (error)
|
|
break;
|
|
allocated += len;
|
|
ra->nr_blocks -= len;
|
|
|
|
if (arec.ar_blockcount > desired) {
|
|
/*
|
|
* Record has more space than we need. The number of
|
|
* free records doesn't change, so shrink the free
|
|
* record, inform the caller that the records are no
|
|
* longer sorted by length, and exit.
|
|
*/
|
|
arec.ar_startblock += desired;
|
|
arec.ar_blockcount -= desired;
|
|
error = xfarray_store(ra->free_records, record_nr,
|
|
&arec);
|
|
if (error)
|
|
break;
|
|
|
|
*needs_resort = true;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* We're going to use up the entire record, so unset it and
|
|
* move on to the next one. This changes the number of free
|
|
* records (but doesn't break the sorting order), so we must
|
|
* go around the loop once more to re-run _bload_init.
|
|
*/
|
|
error = xfarray_unset(ra->free_records, record_nr);
|
|
if (error)
|
|
break;
|
|
ra->nr_real_records--;
|
|
record_nr--;
|
|
} while (1);
|
|
|
|
return error;
|
|
}
|
|
|
|
STATIC int
|
|
xrep_abt_dispose_one(
|
|
struct xrep_abt *ra,
|
|
struct xrep_newbt_resv *resv)
|
|
{
|
|
struct xfs_scrub *sc = ra->sc;
|
|
struct xfs_perag *pag = sc->sa.pag;
|
|
xfs_agblock_t free_agbno = resv->agbno + resv->used;
|
|
xfs_extlen_t free_aglen = resv->len - resv->used;
|
|
int error;
|
|
|
|
ASSERT(pag == resv->pag);
|
|
|
|
/* Add a deferred rmap for each extent we used. */
|
|
if (resv->used > 0)
|
|
xfs_rmap_alloc_extent(sc->tp, pag_agno(pag), resv->agbno,
|
|
resv->used, XFS_RMAP_OWN_AG);
|
|
|
|
/*
|
|
* For each reserved btree block we didn't use, add it to the free
|
|
* space btree. We didn't touch fdblocks when we reserved them, so
|
|
* we don't touch it now.
|
|
*/
|
|
if (free_aglen == 0)
|
|
return 0;
|
|
|
|
trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
|
|
ra->new_bnobt.oinfo.oi_owner);
|
|
|
|
error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
|
|
&ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
|
|
if (error)
|
|
return error;
|
|
|
|
return xrep_defer_finish(sc);
|
|
}
|
|
|
|
/*
|
|
* Deal with all the space we reserved. Blocks that were allocated for the
|
|
* free space btrees need to have a (deferred) rmap added for the OWN_AG
|
|
* allocation, and blocks that didn't get used can be freed via the usual
|
|
* (deferred) means.
|
|
*/
|
|
STATIC void
|
|
xrep_abt_dispose_reservations(
|
|
struct xrep_abt *ra,
|
|
int error)
|
|
{
|
|
struct xrep_newbt_resv *resv, *n;
|
|
|
|
if (error)
|
|
goto junkit;
|
|
|
|
list_for_each_entry_safe(resv, n, &ra->new_bnobt.resv_list, list) {
|
|
error = xrep_abt_dispose_one(ra, resv);
|
|
if (error)
|
|
goto junkit;
|
|
}
|
|
|
|
junkit:
|
|
list_for_each_entry_safe(resv, n, &ra->new_bnobt.resv_list, list) {
|
|
xfs_perag_put(resv->pag);
|
|
list_del(&resv->list);
|
|
kfree(resv);
|
|
}
|
|
|
|
xrep_newbt_cancel(&ra->new_bnobt);
|
|
xrep_newbt_cancel(&ra->new_cntbt);
|
|
}
|
|
|
|
/* Retrieve free space data for bulk load. */
|
|
STATIC int
|
|
xrep_abt_get_records(
|
|
struct xfs_btree_cur *cur,
|
|
unsigned int idx,
|
|
struct xfs_btree_block *block,
|
|
unsigned int nr_wanted,
|
|
void *priv)
|
|
{
|
|
struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a;
|
|
struct xrep_abt *ra = priv;
|
|
union xfs_btree_rec *block_rec;
|
|
unsigned int loaded;
|
|
int error;
|
|
|
|
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
|
error = xfarray_load_next(ra->free_records, &ra->array_cur,
|
|
arec);
|
|
if (error)
|
|
return error;
|
|
|
|
ra->longest = max(ra->longest, arec->ar_blockcount);
|
|
|
|
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
|
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
|
}
|
|
|
|
return loaded;
|
|
}
|
|
|
|
/* Feed one of the new btree blocks to the bulk loader. */
|
|
STATIC int
|
|
xrep_abt_claim_block(
|
|
struct xfs_btree_cur *cur,
|
|
union xfs_btree_ptr *ptr,
|
|
void *priv)
|
|
{
|
|
struct xrep_abt *ra = priv;
|
|
|
|
return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
|
|
}
|
|
|
|
/*
|
|
* Reset the AGF counters to reflect the free space btrees that we just
|
|
* rebuilt, then reinitialize the per-AG data.
|
|
*/
|
|
STATIC int
|
|
xrep_abt_reset_counters(
|
|
struct xrep_abt *ra)
|
|
{
|
|
struct xfs_scrub *sc = ra->sc;
|
|
struct xfs_perag *pag = sc->sa.pag;
|
|
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
|
|
unsigned int freesp_btreeblks = 0;
|
|
|
|
/*
|
|
* Compute the contribution to agf_btreeblks for the new free space
|
|
* btrees. This is the computed btree size minus anything we didn't
|
|
* use.
|
|
*/
|
|
freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
|
|
freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
|
|
|
|
freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
|
|
freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
|
|
|
|
/*
|
|
* The AGF header contains extra information related to the free space
|
|
* btrees, so we must update those fields here.
|
|
*/
|
|
agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
|
|
(be32_to_cpu(agf->agf_rmap_blocks) - 1));
|
|
agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
|
|
agf->agf_longest = cpu_to_be32(ra->longest);
|
|
xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
|
|
XFS_AGF_LONGEST |
|
|
XFS_AGF_FREEBLKS);
|
|
|
|
/*
|
|
* After we commit the new btree to disk, it is possible that the
|
|
* process to reap the old btree blocks will race with the AIL trying
|
|
* to checkpoint the old btree blocks into the filesystem. If the new
|
|
* tree is shorter than the old one, the allocbt write verifier will
|
|
* fail and the AIL will shut down the filesystem.
|
|
*
|
|
* To avoid this, save the old incore btree height values as the alt
|
|
* height values before re-initializing the perag info from the updated
|
|
* AGF to capture all the new values.
|
|
*/
|
|
pag->pagf_repair_bno_level = pag->pagf_bno_level;
|
|
pag->pagf_repair_cnt_level = pag->pagf_cnt_level;
|
|
|
|
/* Reinitialize with the values we just logged. */
|
|
return xrep_reinit_pagf(sc);
|
|
}
|
|
|
|
/*
|
|
* Use the collected free space information to stage new free space btrees.
|
|
* If this is successful we'll return with the new btree root
|
|
* information logged to the repair transaction but not yet committed.
|
|
*/
|
|
STATIC int
|
|
xrep_abt_build_new_trees(
|
|
struct xrep_abt *ra)
|
|
{
|
|
struct xfs_scrub *sc = ra->sc;
|
|
struct xfs_btree_cur *bno_cur;
|
|
struct xfs_btree_cur *cnt_cur;
|
|
struct xfs_perag *pag = sc->sa.pag;
|
|
bool needs_resort = false;
|
|
int error;
|
|
|
|
/*
|
|
* Sort the free extents by length so that we can set up the free space
|
|
* btrees in as few extents as possible. This reduces the amount of
|
|
* deferred rmap / free work we have to do at the end.
|
|
*/
|
|
error = xrep_cntbt_sort_records(ra, false);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* Prepare to construct the new btree by reserving disk space for the
|
|
* new btree and setting up all the accounting information we'll need
|
|
* to root the new btree while it's under construction and before we
|
|
* attach it to the AG header.
|
|
*/
|
|
xrep_newbt_init_bare(&ra->new_bnobt, sc);
|
|
xrep_newbt_init_bare(&ra->new_cntbt, sc);
|
|
|
|
ra->new_bnobt.bload.get_records = xrep_abt_get_records;
|
|
ra->new_cntbt.bload.get_records = xrep_abt_get_records;
|
|
|
|
ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
|
|
ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
|
|
|
|
/* Allocate cursors for the staged btrees. */
|
|
bno_cur = xfs_bnobt_init_cursor(sc->mp, NULL, NULL, pag);
|
|
xfs_btree_stage_afakeroot(bno_cur, &ra->new_bnobt.afake);
|
|
|
|
cnt_cur = xfs_cntbt_init_cursor(sc->mp, NULL, NULL, pag);
|
|
xfs_btree_stage_afakeroot(cnt_cur, &ra->new_cntbt.afake);
|
|
|
|
/* Last chance to abort before we start committing fixes. */
|
|
if (xchk_should_terminate(sc, &error))
|
|
goto err_cur;
|
|
|
|
/* Reserve the space we'll need for the new btrees. */
|
|
error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
|
|
if (error)
|
|
goto err_cur;
|
|
|
|
/*
|
|
* If we need to re-sort the free extents by length, do so so that we
|
|
* can put the records into the cntbt in the correct order.
|
|
*/
|
|
if (needs_resort) {
|
|
error = xrep_cntbt_sort_records(ra, needs_resort);
|
|
if (error)
|
|
goto err_cur;
|
|
}
|
|
|
|
/*
|
|
* Due to btree slack factors, it's possible for a new btree to be one
|
|
* level taller than the old btree. Update the alternate incore btree
|
|
* height so that we don't trip the verifiers when writing the new
|
|
* btree blocks to disk.
|
|
*/
|
|
pag->pagf_repair_bno_level = ra->new_bnobt.bload.btree_height;
|
|
pag->pagf_repair_cnt_level = ra->new_cntbt.bload.btree_height;
|
|
|
|
/* Load the free space by length tree. */
|
|
ra->array_cur = XFARRAY_CURSOR_INIT;
|
|
ra->longest = 0;
|
|
error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
|
|
if (error)
|
|
goto err_levels;
|
|
|
|
error = xrep_bnobt_sort_records(ra);
|
|
if (error)
|
|
goto err_levels;
|
|
|
|
/* Load the free space by block number tree. */
|
|
ra->array_cur = XFARRAY_CURSOR_INIT;
|
|
error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
|
|
if (error)
|
|
goto err_levels;
|
|
|
|
/*
|
|
* Install the new btrees in the AG header. After this point the old
|
|
* btrees are no longer accessible and the new trees are live.
|
|
*/
|
|
xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
|
|
xfs_btree_del_cursor(bno_cur, 0);
|
|
xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
|
|
xfs_btree_del_cursor(cnt_cur, 0);
|
|
|
|
/* Reset the AGF counters now that we've changed the btree shape. */
|
|
error = xrep_abt_reset_counters(ra);
|
|
if (error)
|
|
goto err_newbt;
|
|
|
|
/* Dispose of any unused blocks and the accounting information. */
|
|
xrep_abt_dispose_reservations(ra, error);
|
|
|
|
return xrep_roll_ag_trans(sc);
|
|
|
|
err_levels:
|
|
pag->pagf_repair_bno_level = 0;
|
|
pag->pagf_repair_cnt_level = 0;
|
|
err_cur:
|
|
xfs_btree_del_cursor(cnt_cur, error);
|
|
xfs_btree_del_cursor(bno_cur, error);
|
|
err_newbt:
|
|
xrep_abt_dispose_reservations(ra, error);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Now that we've logged the roots of the new btrees, invalidate all of the
|
|
* old blocks and free them.
|
|
*/
|
|
STATIC int
|
|
xrep_abt_remove_old_trees(
|
|
struct xrep_abt *ra)
|
|
{
|
|
struct xfs_perag *pag = ra->sc->sa.pag;
|
|
int error;
|
|
|
|
/* Free the old btree blocks if they're not in use. */
|
|
error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
|
|
&XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* Now that we've zapped all the old allocbt blocks we can turn off
|
|
* the alternate height mechanism.
|
|
*/
|
|
pag->pagf_repair_bno_level = 0;
|
|
pag->pagf_repair_cnt_level = 0;
|
|
return 0;
|
|
}
|
|
|
|
/* Repair the freespace btrees for some AG. */
|
|
int
|
|
xrep_allocbt(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
struct xrep_abt *ra;
|
|
struct xfs_mount *mp = sc->mp;
|
|
unsigned int busy_gen;
|
|
char *descr;
|
|
int error;
|
|
|
|
/* We require the rmapbt to rebuild anything. */
|
|
if (!xfs_has_rmapbt(mp))
|
|
return -EOPNOTSUPP;
|
|
|
|
ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
|
|
if (!ra)
|
|
return -ENOMEM;
|
|
ra->sc = sc;
|
|
|
|
/* We rebuild both data structures. */
|
|
sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
|
|
|
|
/*
|
|
* Make sure the busy extent list is clear because we can't put extents
|
|
* on there twice. In theory we cleared this before we started, but
|
|
* let's not risk the filesystem.
|
|
*/
|
|
if (!xfs_extent_busy_list_empty(pag_group(sc->sa.pag), &busy_gen)) {
|
|
error = -EDEADLOCK;
|
|
goto out_ra;
|
|
}
|
|
|
|
/* Set up enough storage to handle maximally fragmented free space. */
|
|
descr = xchk_xfile_ag_descr(sc, "free space records");
|
|
error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
|
|
sizeof(struct xfs_alloc_rec_incore),
|
|
&ra->free_records);
|
|
kfree(descr);
|
|
if (error)
|
|
goto out_ra;
|
|
|
|
/* Collect the free space data and find the old btree blocks. */
|
|
xagb_bitmap_init(&ra->old_allocbt_blocks);
|
|
error = xrep_abt_find_freespace(ra);
|
|
if (error)
|
|
goto out_bitmap;
|
|
|
|
/* Rebuild the free space information. */
|
|
error = xrep_abt_build_new_trees(ra);
|
|
if (error)
|
|
goto out_bitmap;
|
|
|
|
/* Kill the old trees. */
|
|
error = xrep_abt_remove_old_trees(ra);
|
|
if (error)
|
|
goto out_bitmap;
|
|
|
|
out_bitmap:
|
|
xagb_bitmap_destroy(&ra->old_allocbt_blocks);
|
|
xfarray_destroy(ra->free_records);
|
|
out_ra:
|
|
kfree(ra);
|
|
return error;
|
|
}
|
|
|
|
/* Make sure both btrees are ok after we've rebuilt them. */
|
|
int
|
|
xrep_revalidate_allocbt(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
__u32 old_type = sc->sm->sm_type;
|
|
int error;
|
|
|
|
/*
|
|
* We must update sm_type temporarily so that the tree-to-tree cross
|
|
* reference checks will work in the correct direction, and also so
|
|
* that tracing will report correctly if there are more errors.
|
|
*/
|
|
sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
|
|
error = xchk_allocbt(sc);
|
|
if (error)
|
|
goto out;
|
|
|
|
sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
|
|
error = xchk_allocbt(sc);
|
|
out:
|
|
sc->sm->sm_type = old_type;
|
|
return error;
|
|
}
|