linux/fs/xfs/xfs_dquot.h

262 lines
6.5 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#ifndef __XFS_DQUOT_H__
#define __XFS_DQUOT_H__
/*
* Dquots are structures that hold quota information about a user or a group,
* much like inodes are for files. In fact, dquots share many characteristics
* with inodes. However, dquots can also be a centralized resource, relative
* to a collection of inodes. In this respect, dquots share some characteristics
* of the superblock.
* XFS dquots exploit both those in its algorithms. They make every attempt
* to not be a bottleneck when quotas are on and have minimal impact, if any,
* when quotas are off.
*/
struct xfs_mount;
struct xfs_trans;
enum {
XFS_QLOWSP_1_PCNT = 0,
XFS_QLOWSP_3_PCNT,
XFS_QLOWSP_5_PCNT,
XFS_QLOWSP_MAX
};
struct xfs_dquot_res {
/* Total resources allocated and reserved. */
xfs_qcnt_t reserved;
/* Total resources allocated. */
xfs_qcnt_t count;
/* Absolute and preferred limits. */
xfs_qcnt_t hardlimit;
xfs_qcnt_t softlimit;
/*
* For root dquots, this is the default grace period, in seconds.
* Otherwise, this is when the quota grace period expires,
* in seconds since the Unix epoch.
*/
time64_t timer;
};
static inline bool
xfs_dquot_res_over_limits(
const struct xfs_dquot_res *qres)
{
if ((qres->softlimit && qres->softlimit < qres->reserved) ||
(qres->hardlimit && qres->hardlimit < qres->reserved))
return true;
return false;
}
struct xfs_dquot_pre {
xfs_qcnt_t q_prealloc_lo_wmark;
xfs_qcnt_t q_prealloc_hi_wmark;
int64_t q_low_space[XFS_QLOWSP_MAX];
};
/*
* The incore dquot structure
*/
struct xfs_dquot {
struct list_head q_lru;
struct xfs_mount *q_mount;
xfs_dqtype_t q_type;
uint16_t q_flags;
xfs_dqid_t q_id;
uint q_nrefs;
int q_bufoffset;
xfs_daddr_t q_blkno;
xfs_fileoff_t q_fileoffset;
struct xfs_dquot_res q_blk; /* regular blocks */
struct xfs_dquot_res q_ino; /* inodes */
struct xfs_dquot_res q_rtb; /* realtime blocks */
struct xfs_dq_logitem q_logitem;
struct xfs_dquot_pre q_blk_prealloc;
struct xfs_dquot_pre q_rtb_prealloc;
struct mutex q_qlock;
struct completion q_flush;
atomic_t q_pincount;
struct wait_queue_head q_pinwait;
};
/*
* Lock hierarchy for q_qlock:
* XFS_QLOCK_NORMAL is the implicit default,
* XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
*/
enum {
XFS_QLOCK_NORMAL = 0,
XFS_QLOCK_NESTED,
};
/*
* Manage the q_flush completion queue embedded in the dquot. This completion
* queue synchronizes processes attempting to flush the in-core dquot back to
* disk.
*/
static inline void xfs_dqflock(struct xfs_dquot *dqp)
{
wait_for_completion(&dqp->q_flush);
}
static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp)
{
return try_wait_for_completion(&dqp->q_flush);
}
static inline void xfs_dqfunlock(struct xfs_dquot *dqp)
{
complete(&dqp->q_flush);
}
static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
{
return mutex_trylock(&dqp->q_qlock);
}
static inline void xfs_dqlock(struct xfs_dquot *dqp)
{
mutex_lock(&dqp->q_qlock);
}
static inline void xfs_dqunlock(struct xfs_dquot *dqp)
{
mutex_unlock(&dqp->q_qlock);
}
static inline int
xfs_dquot_type(const struct xfs_dquot *dqp)
{
return dqp->q_type & XFS_DQTYPE_REC_MASK;
}
static inline int xfs_this_quota_on(struct xfs_mount *mp, xfs_dqtype_t type)
{
switch (type) {
case XFS_DQTYPE_USER:
return XFS_IS_UQUOTA_ON(mp);
case XFS_DQTYPE_GROUP:
return XFS_IS_GQUOTA_ON(mp);
case XFS_DQTYPE_PROJ:
return XFS_IS_PQUOTA_ON(mp);
default:
return 0;
}
}
static inline struct xfs_dquot *xfs_inode_dquot(
struct xfs_inode *ip,
xfs_dqtype_t type)
{
if (xfs_is_metadir_inode(ip))
return NULL;
switch (type) {
case XFS_DQTYPE_USER:
return ip->i_udquot;
case XFS_DQTYPE_GROUP:
return ip->i_gdquot;
case XFS_DQTYPE_PROJ:
return ip->i_pdquot;
default:
return NULL;
}
}
/* Decide if the dquot's limits are actually being enforced. */
static inline bool
xfs_dquot_is_enforced(
const struct xfs_dquot *dqp)
{
switch (xfs_dquot_type(dqp)) {
case XFS_DQTYPE_USER:
return XFS_IS_UQUOTA_ENFORCED(dqp->q_mount);
case XFS_DQTYPE_GROUP:
return XFS_IS_GQUOTA_ENFORCED(dqp->q_mount);
case XFS_DQTYPE_PROJ:
return XFS_IS_PQUOTA_ENFORCED(dqp->q_mount);
}
ASSERT(0);
return false;
}
xfs: run an eofblocks scan on ENOSPC/EDQUOT From: Brian Foster <bfoster@redhat.com> Speculative preallocation and and the associated throttling metrics assume we're working with large files on large filesystems. Users have reported inefficiencies in these mechanisms when we happen to be dealing with large files on smaller filesystems. This can occur because while prealloc throttling is aggressive under low free space conditions, it is not active until we reach 5% free space or less. For example, a 40GB filesystem has enough space for several files large enough to have multi-GB preallocations at any given time. If those files are slow growing, they might reserve preallocation for long periods of time as well as avoid the background scanner due to frequent modification. If a new file is written under these conditions, said file has no access to this already reserved space and premature ENOSPC is imminent. To handle this scenario, modify the buffered write ENOSPC handling and retry sequence to invoke an eofblocks scan. In the smaller filesystem scenario, the eofblocks scan resets the usage of preallocation such that when the 5% free space threshold is met, throttling effectively takes over to provide fair and efficient preallocation until legitimate ENOSPC. The eofblocks scan is selective based on the nature of the failure. For example, an EDQUOT failure in a particular quota will use a filtered scan for that quota. Because we don't know which quota might have caused an allocation failure at any given time, we include each applicable quota determined to be under low free space conditions in the scan. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-24 19:49:28 +10:00
/*
* Check whether a dquot is under low free space conditions. We assume the quota
* is enabled and enforced.
*/
static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
{
int64_t freesp;
freesp = dqp->q_blk.hardlimit - dqp->q_blk.reserved;
if (freesp < dqp->q_blk_prealloc.q_low_space[XFS_QLOWSP_1_PCNT])
return true;
freesp = dqp->q_rtb.hardlimit - dqp->q_rtb.reserved;
if (freesp < dqp->q_rtb_prealloc.q_low_space[XFS_QLOWSP_1_PCNT])
xfs: run an eofblocks scan on ENOSPC/EDQUOT From: Brian Foster <bfoster@redhat.com> Speculative preallocation and and the associated throttling metrics assume we're working with large files on large filesystems. Users have reported inefficiencies in these mechanisms when we happen to be dealing with large files on smaller filesystems. This can occur because while prealloc throttling is aggressive under low free space conditions, it is not active until we reach 5% free space or less. For example, a 40GB filesystem has enough space for several files large enough to have multi-GB preallocations at any given time. If those files are slow growing, they might reserve preallocation for long periods of time as well as avoid the background scanner due to frequent modification. If a new file is written under these conditions, said file has no access to this already reserved space and premature ENOSPC is imminent. To handle this scenario, modify the buffered write ENOSPC handling and retry sequence to invoke an eofblocks scan. In the smaller filesystem scenario, the eofblocks scan resets the usage of preallocation such that when the 5% free space threshold is met, throttling effectively takes over to provide fair and efficient preallocation until legitimate ENOSPC. The eofblocks scan is selective based on the nature of the failure. For example, an EDQUOT failure in a particular quota will use a filtered scan for that quota. Because we don't know which quota might have caused an allocation failure at any given time, we include each applicable quota determined to be under low free space conditions in the scan. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-24 19:49:28 +10:00
return true;
return false;
}
void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp);
#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->q_flags & XFS_DQFLAG_DIRTY)
void xfs_qm_dqdestroy(struct xfs_dquot *dqp);
int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf *bp);
void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
void xfs_qm_adjust_dqtimers(struct xfs_dquot *d);
void xfs_qm_adjust_dqlimits(struct xfs_dquot *d);
xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip,
xfs_dqtype_t type);
int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
xfs_dqtype_t type, bool can_alloc,
struct xfs_dquot **dqpp);
int xfs_qm_dqget_inode(struct xfs_inode *ip, xfs_dqtype_t type,
bool can_alloc, struct xfs_dquot **dqpp);
int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
xfs_dqtype_t type, struct xfs_dquot **dqpp);
int xfs_qm_dqget_uncached(struct xfs_mount *mp,
xfs_dqid_t id, xfs_dqtype_t type,
struct xfs_dquot **dqpp);
void xfs_qm_dqput(struct xfs_dquot *dqp);
void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
void xfs_dqlockn(struct xfs_dqtrx *q);
void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
xfs: attach dquot buffer to dquot log item buffer Ever since 6.12-rc1, I've observed a pile of warnings from the kernel when running fstests with quotas enabled: WARNING: CPU: 1 PID: 458580 at mm/page_alloc.c:4221 __alloc_pages_noprof+0xc9c/0xf18 CPU: 1 UID: 0 PID: 458580 Comm: xfsaild/sda3 Tainted: G W 6.12.0-rc6-djwa #rc6 6ee3e0e531f6457e2d26aa008a3b65ff184b377c <snip> Call trace: __alloc_pages_noprof+0xc9c/0xf18 alloc_pages_mpol_noprof+0x94/0x240 alloc_pages_noprof+0x68/0xf8 new_slab+0x3e0/0x568 ___slab_alloc+0x5a0/0xb88 __slab_alloc.constprop.0+0x7c/0xf8 __kmalloc_noprof+0x404/0x4d0 xfs_buf_get_map+0x594/0xde0 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_buf_read_map+0x64/0x2e0 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_trans_read_buf_map+0x1dc/0x518 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_qm_dqflush+0xac/0x468 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_qm_dquot_logitem_push+0xe4/0x148 [xfs 384cb02810558b4c490343c164e9407332118f88] xfsaild+0x3f4/0xde8 [xfs 384cb02810558b4c490343c164e9407332118f88] kthread+0x110/0x128 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- This corresponds to the line: WARN_ON_ONCE(current->flags & PF_MEMALLOC); within the NOFAIL checks. What's happening here is that the XFS AIL is trying to write a disk quota update back into the filesystem, but for that it needs to read the ondisk buffer for the dquot. The buffer is not in memory anymore, probably because it was evicted. Regardless, the buffer cache tries to allocate a new buffer, but those allocations are NOFAIL. The AIL thread has marked itself PF_MEMALLOC (aka noreclaim) since commit 43ff2122e6492b ("xfs: on-stack delayed write buffer lists") presumably because reclaim can push on XFS to push on the AIL. An easy way to fix this probably would have been to drop the NOFAIL flag from the xfs_buf allocation and open code a retry loop, but then there's still the problem that for bs>ps filesystems, the buffer itself could require up to 64k worth of pages. Inode items had similar behavior (multi-page cluster buffers that we don't want to allocate in the AIL) which we solved by making transaction precommit attach the inode cluster buffers to the dirty log item. Let's solve the dquot problem in the same way. So: Make a real precommit handler to read the dquot buffer and attach it to the log item; pass it to dqflush in the push method; and have the iodone function detach the buffer once we've flushed everything. Add a state flag to the log item to track when a thread has entered the precommit -> push mechanism to skip the detaching if it turns out that the dquot is very busy, as we don't hold the dquot lock between log item commit and AIL push). Reading and attaching the dquot buffer in the precommit hook is inspired by the work done for inode cluster buffers some time ago. Cc: <stable@vger.kernel.org> # v6.12 Fixes: 903edea6c53f09 ("mm: warn about illegal __GFP_NOFAIL usage in a more appropriate location and manner") Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
2024-12-02 10:57:38 -08:00
int xfs_dquot_attach_buf(struct xfs_trans *tp, struct xfs_dquot *dqp);
int xfs_dquot_use_attached_buf(struct xfs_dquot *dqp, struct xfs_buf **bpp);
void xfs_dquot_detach_buf(struct xfs_dquot *dqp);
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
{
xfs_dqlock(dqp);
dqp->q_nrefs++;
xfs_dqunlock(dqp);
return dqp;
}
time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout);
time64_t xfs_dquot_set_grace_period(time64_t grace);
void xfs_qm_init_dquot_blk(struct xfs_trans *tp, xfs_dqid_t id, xfs_dqtype_t
type, struct xfs_buf *bp);
#endif /* __XFS_DQUOT_H__ */