mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

None of them actually needs the inode, the mount is enough. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Carlos Maiolino <cem@kernel.org>
258 lines
6.9 KiB
C
258 lines
6.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2023-2025 Christoph Hellwig.
|
|
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_rtbitmap.h"
|
|
#include "xfs_zone_alloc.h"
|
|
#include "xfs_zone_priv.h"
|
|
#include "xfs_zones.h"
|
|
|
|
/*
|
|
* Note: the zoned allocator does not support a rtextsize > 1, so this code and
|
|
* the allocator itself uses file system blocks interchangeable with realtime
|
|
* extents without doing the otherwise required conversions.
|
|
*/
|
|
|
|
/*
|
|
* Per-task space reservation.
|
|
*
|
|
* Tasks that need to wait for GC to free up space allocate one of these
|
|
* on-stack and adds it to the per-mount zi_reclaim_reservations lists.
|
|
* The GC thread will then wake the tasks in order when space becomes available.
|
|
*/
|
|
struct xfs_zone_reservation {
|
|
struct list_head entry;
|
|
struct task_struct *task;
|
|
xfs_filblks_t count_fsb;
|
|
};
|
|
|
|
/*
|
|
* Calculate the number of reserved blocks.
|
|
*
|
|
* XC_FREE_RTEXTENTS counts the user available capacity, to which the file
|
|
* system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
|
|
* available for writes without waiting for GC.
|
|
*
|
|
* For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
|
|
* block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
|
|
* is further restricted by at least one zone as well as the optional
|
|
* persistently reserved blocks. This allows the allocator to run more
|
|
* smoothly by not always triggering GC.
|
|
*/
|
|
uint64_t
|
|
xfs_zoned_default_resblks(
|
|
struct xfs_mount *mp,
|
|
enum xfs_free_counter ctr)
|
|
{
|
|
switch (ctr) {
|
|
case XC_FREE_RTEXTENTS:
|
|
return (uint64_t)XFS_RESERVED_ZONES *
|
|
mp->m_groups[XG_TYPE_RTG].blocks +
|
|
mp->m_sb.sb_rtreserved;
|
|
case XC_FREE_RTAVAILABLE:
|
|
return (uint64_t)XFS_GC_ZONES *
|
|
mp->m_groups[XG_TYPE_RTG].blocks;
|
|
default:
|
|
ASSERT(0);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void
|
|
xfs_zoned_resv_wake_all(
|
|
struct xfs_mount *mp)
|
|
{
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
struct xfs_zone_reservation *reservation;
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
|
|
wake_up_process(reservation->task);
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
}
|
|
|
|
void
|
|
xfs_zoned_add_available(
|
|
struct xfs_mount *mp,
|
|
xfs_filblks_t count_fsb)
|
|
{
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
struct xfs_zone_reservation *reservation;
|
|
|
|
if (list_empty_careful(&zi->zi_reclaim_reservations)) {
|
|
xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
|
|
return;
|
|
}
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
|
|
count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
|
|
list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
|
|
if (reservation->count_fsb > count_fsb)
|
|
break;
|
|
wake_up_process(reservation->task);
|
|
count_fsb -= reservation->count_fsb;
|
|
|
|
}
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
}
|
|
|
|
static int
|
|
xfs_zoned_space_wait_error(
|
|
struct xfs_mount *mp)
|
|
{
|
|
if (xfs_is_shutdown(mp))
|
|
return -EIO;
|
|
if (fatal_signal_pending(current))
|
|
return -EINTR;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
xfs_zoned_reserve_available(
|
|
struct xfs_mount *mp,
|
|
xfs_filblks_t count_fsb,
|
|
unsigned int flags)
|
|
{
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
struct xfs_zone_reservation reservation = {
|
|
.task = current,
|
|
.count_fsb = count_fsb,
|
|
};
|
|
int error;
|
|
|
|
/*
|
|
* If there are no waiters, try to directly grab the available blocks
|
|
* from the percpu counter.
|
|
*
|
|
* If the caller wants to dip into the reserved pool also bypass the
|
|
* wait list. This relies on the fact that we have a very graciously
|
|
* sized reserved pool that always has enough space. If the reserved
|
|
* allocations fail we're in trouble.
|
|
*/
|
|
if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
|
|
(flags & XFS_ZR_RESERVED))) {
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
if (error != -ENOSPC)
|
|
return error;
|
|
}
|
|
|
|
if (flags & XFS_ZR_NOWAIT)
|
|
return -EAGAIN;
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
|
|
while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
|
|
set_current_state(TASK_KILLABLE);
|
|
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
if (error != -ENOSPC)
|
|
break;
|
|
|
|
/*
|
|
* Make sure to start GC if it is not running already. As we
|
|
* check the rtavailable count when filling up zones, GC is
|
|
* normally already running at this point, but in some setups
|
|
* with very few zones we may completely run out of non-
|
|
* reserved blocks in between filling zones.
|
|
*/
|
|
if (!xfs_is_zonegc_running(mp))
|
|
wake_up_process(zi->zi_gc_thread);
|
|
|
|
/*
|
|
* If there is no reclaimable group left and we aren't still
|
|
* processing a pending GC request give up as we're fully out
|
|
* of space.
|
|
*/
|
|
if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
|
|
!xfs_is_zonegc_running(mp))
|
|
break;
|
|
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
schedule();
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
}
|
|
list_del(&reservation.entry);
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Implement greedy space allocation for short writes by trying to grab all
|
|
* that is left after locking out other threads from trying to do the same.
|
|
*
|
|
* This isn't exactly optimal and can hopefully be replaced by a proper
|
|
* percpu_counter primitive one day.
|
|
*/
|
|
static int
|
|
xfs_zoned_reserve_extents_greedy(
|
|
struct xfs_mount *mp,
|
|
xfs_filblks_t *count_fsb,
|
|
unsigned int flags)
|
|
{
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
s64 len = *count_fsb;
|
|
int error = -ENOSPC;
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
|
|
if (len > 0) {
|
|
*count_fsb = len;
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
}
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
return error;
|
|
}
|
|
|
|
int
|
|
xfs_zoned_space_reserve(
|
|
struct xfs_mount *mp,
|
|
xfs_filblks_t count_fsb,
|
|
unsigned int flags,
|
|
struct xfs_zone_alloc_ctx *ac)
|
|
{
|
|
int error;
|
|
|
|
ASSERT(ac->reserved_blocks == 0);
|
|
ASSERT(ac->open_zone == NULL);
|
|
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
|
|
error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xfs_zoned_reserve_available(mp, count_fsb, flags);
|
|
if (error) {
|
|
xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
|
|
return error;
|
|
}
|
|
ac->reserved_blocks = count_fsb;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
xfs_zoned_space_unreserve(
|
|
struct xfs_mount *mp,
|
|
struct xfs_zone_alloc_ctx *ac)
|
|
{
|
|
if (ac->reserved_blocks > 0) {
|
|
xfs_zoned_add_available(mp, ac->reserved_blocks);
|
|
xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
|
|
}
|
|
if (ac->open_zone)
|
|
xfs_open_zone_put(ac->open_zone);
|
|
}
|