mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Implement buffered writes including page faults and block zeroing for zoned RT devices. Buffered writes to zoned RT devices are split into three phases: 1) a reservation for the worst case data block usage is taken before acquiring the iolock. When there are enough free blocks but not enough available one, garbage collection is kicked off to free the space before continuing with the write. If there isn't enough freeable space, the block reservation is reduced and a short write will happen as expected by normal Linux write semantics. 2) with the iolock held, the generic iomap buffered write code is called, which through the iomap_begin operation usually just inserts delalloc extents for the range in a single iteration. Only for overwrites of existing data that are not block aligned, or zeroing operations the existing extent mapping is read to fill out the srcmap and to figure out if zeroing is required. 3) the ->map_blocks callback to the generic iomap writeback code calls into the zoned space allocator to actually allocate on-disk space for the range before kicking of the writeback. Note that because all writes are out of place, truncate or hole punches that are not aligned to block size boundaries need to allocate space. For block zeroing from truncate, ->setattr is called with the iolock (aka i_rwsem) already held, so a hacky deviation from the above scheme is needed. In this case the space reservations is called with the iolock held, but is required not to block and can dip into the reserved block pool. This can lead to -ENOSPC when truncating a file, which is unfortunate. But fixing the calling conventions in the VFS is probably much easier with code requiring it already in mainline. Similarly because all writes are out place, the zoned allocator can't support unwritten extents and thus the FALLOC_FL_ALLOCATE_RANGE range mode of fallocate. Other fallocate modes that would reserved space but don't need to to provide proper semantics do work but do not reserve space. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
84 lines
2.7 KiB
C
84 lines
2.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#ifndef __XFS_BMAP_UTIL_H__
|
|
#define __XFS_BMAP_UTIL_H__
|
|
|
|
/* Kernel only BMAP related definitions and functions */
|
|
|
|
struct xfs_bmbt_irec;
|
|
struct xfs_extent_free_item;
|
|
struct xfs_ifork;
|
|
struct xfs_inode;
|
|
struct xfs_mount;
|
|
struct xfs_trans;
|
|
struct xfs_bmalloca;
|
|
struct xfs_zone_alloc_ctx;
|
|
|
|
#ifdef CONFIG_XFS_RT
|
|
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
|
|
#else /* !CONFIG_XFS_RT */
|
|
/*
|
|
* Attempts to allocate RT extents when RT is disable indicates corruption and
|
|
* should trigger a shutdown.
|
|
*/
|
|
static inline int
|
|
xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
|
|
{
|
|
return -EFSCORRUPTED;
|
|
}
|
|
#endif /* CONFIG_XFS_RT */
|
|
|
|
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
|
|
xfs_off_t start_byte, xfs_off_t end_byte,
|
|
struct xfs_zone_alloc_ctx *ac);
|
|
|
|
struct kgetbmap {
|
|
__s64 bmv_offset; /* file offset of segment in blocks */
|
|
__s64 bmv_block; /* starting block (64-bit daddr_t) */
|
|
__s64 bmv_length; /* length of segment, blocks */
|
|
__s32 bmv_oflags; /* output flags */
|
|
};
|
|
int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
|
|
struct kgetbmap *out);
|
|
|
|
/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
|
|
int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
|
|
struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
|
|
int rt, int eof, int delay, int convert,
|
|
xfs_fileoff_t *offp, xfs_extlen_t *lenp);
|
|
bool xfs_bmap_adjacent(struct xfs_bmalloca *ap);
|
|
int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
|
|
int whichfork, struct xfs_bmbt_irec *rec,
|
|
int *is_empty);
|
|
|
|
/* preallocation and hole punch interface */
|
|
int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
|
|
xfs_off_t len);
|
|
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
|
|
xfs_off_t len, struct xfs_zone_alloc_ctx *ac);
|
|
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
|
|
xfs_off_t len, struct xfs_zone_alloc_ctx *ac);
|
|
int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
|
|
xfs_off_t len);
|
|
|
|
/* EOF block manipulation functions */
|
|
bool xfs_can_free_eofblocks(struct xfs_inode *ip);
|
|
int xfs_free_eofblocks(struct xfs_inode *ip);
|
|
|
|
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
|
|
struct xfs_swapext *sx);
|
|
|
|
xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
|
|
|
|
xfs_extnum_t xfs_bmap_count_leaves(struct xfs_ifork *ifp, xfs_filblks_t *count);
|
|
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
|
|
int whichfork, xfs_extnum_t *nextents,
|
|
xfs_filblks_t *count);
|
|
|
|
int xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset,
|
|
xfs_off_t len);
|
|
|
|
#endif /* __XFS_BMAP_UTIL_H__ */
|