fs/dax: create a common implementation to break DAX layouts

Prior to freeing a block file systems supporting FS DAX must check that
the associated pages are both unmapped from user-space and not undergoing
DMA or other access from eg.  get_user_pages().  This is achieved by
unmapping the file range and scanning the FS DAX page-cache to see if any
pages within the mapping have an elevated refcount.

This is done using two functions - dax_layout_busy_page_range() which
returns a page to wait for the refcount to become idle on.  Rather than
open-code this introduce a common implementation to both unmap and wait
for the page to become idle.

Link: https://lkml.kernel.org/r/c4d381e41fc618296cee2820403c166d80599d5c.1740713401.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: linmiaohe <linmiaohe@huawei.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Alistair Popple 2025-02-28 14:31:00 +11:00 committed by Andrew Morton
parent e6fa3963a3
commit d5b3afea22
6 changed files with 63 additions and 61 deletions

View file

@ -846,6 +846,39 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
return ret;
}
static int wait_page_idle(struct page *page,
void (cb)(struct inode *),
struct inode *inode)
{
return ___wait_var_event(page, dax_page_is_idle(page),
TASK_INTERRUPTIBLE, 0, 0, cb(inode));
}
/*
* Unmaps the inode and waits for any DMA to complete prior to deleting the
* DAX mapping entries for the range.
*/
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
void (cb)(struct inode *))
{
struct page *page;
int error = 0;
if (!dax_mapping(inode->i_mapping))
return 0;
do {
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
if (!page)
break;
error = wait_page_idle(page, cb, inode);
} while (error == 0);
return error;
}
EXPORT_SYMBOL_GPL(dax_break_layout);
/*
* Invalidate DAX entry if it is clean.
*/

View file

@ -3911,21 +3911,10 @@ static void ext4_wait_dax_page(struct inode *inode)
int ext4_break_layouts(struct inode *inode)
{
struct page *page;
int error;
if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
return -EINVAL;
do {
page = dax_layout_busy_page(inode->i_mapping);
if (!page)
return 0;
error = dax_wait_page_idle(page, ext4_wait_dax_page, inode);
} while (error == 0);
return error;
return dax_break_layout_inode(inode, ext4_wait_dax_page);
}
/*

View file

@ -666,33 +666,12 @@ static void fuse_wait_dax_page(struct inode *inode)
filemap_invalidate_lock(inode->i_mapping);
}
/* Should be called with mapping->invalidate_lock held exclusively */
static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
loff_t start, loff_t end)
{
struct page *page;
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
if (!page)
return 0;
*retry = true;
return dax_wait_page_idle(page, fuse_wait_dax_page, inode);
}
/* Should be called with mapping->invalidate_lock held exclusively. */
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
u64 dmap_end)
{
bool retry;
int ret;
do {
retry = false;
ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
dmap_end);
} while (ret == 0 && retry);
return ret;
return dax_break_layout(inode, dmap_start, dmap_end,
fuse_wait_dax_page);
}
ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)

View file

@ -2735,21 +2735,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
struct xfs_inode *ip2)
{
int error;
bool retry;
struct page *page;
if (ip1->i_ino > ip2->i_ino)
swap(ip1, ip2);
again:
retry = false;
/* Lock the first inode */
xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
if (error || retry) {
error = xfs_break_dax_layouts(VFS_I(ip1));
if (error) {
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
if (error == 0 && retry)
goto again;
return error;
}
@ -2764,7 +2760,7 @@ again:
* for this nested lock case.
*/
page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
if (page && page_ref_count(page) != 1) {
if (!dax_page_is_idle(page)) {
xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
goto again;
@ -3008,19 +3004,11 @@ xfs_wait_dax_page(
int
xfs_break_dax_layouts(
struct inode *inode,
bool *retry)
struct inode *inode)
{
struct page *page;
xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL);
page = dax_layout_busy_page(inode->i_mapping);
if (!page)
return 0;
*retry = true;
return dax_wait_page_idle(page, xfs_wait_dax_page, inode);
return dax_break_layout_inode(inode, xfs_wait_dax_page);
}
int
@ -3038,8 +3026,8 @@ xfs_break_layouts(
retry = false;
switch (reason) {
case BREAK_UNMAP:
error = xfs_break_dax_layouts(inode, &retry);
if (error || retry)
error = xfs_break_dax_layouts(inode);
if (error)
break;
fallthrough;
case BREAK_WRITE:

View file

@ -603,7 +603,7 @@ xfs_itruncate_extents(
return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0);
}
int xfs_break_dax_layouts(struct inode *inode, bool *retry);
int xfs_break_dax_layouts(struct inode *inode);
int xfs_break_layouts(struct inode *inode, uint *iolock,
enum layout_break_reason reason);

View file

@ -207,12 +207,9 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
static inline int dax_wait_page_idle(struct page *page,
void (cb)(struct inode *),
struct inode *inode)
static inline bool dax_page_is_idle(struct page *page)
{
return ___wait_var_event(page, page_ref_count(page) == 1,
TASK_INTERRUPTIBLE, 0, 0, cb(inode));
return page && page_ref_count(page) == 1;
}
#if IS_ENABLED(CONFIG_DAX)
@ -228,6 +225,15 @@ static inline void dax_read_unlock(int id)
{
}
#endif /* CONFIG_DAX */
#if !IS_ENABLED(CONFIG_FS_DAX)
static inline int __must_check dax_break_layout(struct inode *inode,
loff_t start, loff_t end, void (cb)(struct inode *))
{
return 0;
}
#endif
bool dax_alive(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
@ -251,6 +257,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
int __must_check dax_break_layout(struct inode *inode, loff_t start,
loff_t end, void (cb)(struct inode *));
static inline int __must_check dax_break_layout_inode(struct inode *inode,
void (cb)(struct inode *))
{
return dax_break_layout(inode, 0, LLONG_MAX, cb);
}
int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
struct inode *dest, loff_t destoff,
loff_t len, bool *is_same,