xfs: add inode to zone caching for data placement

Placing data from the same file in the same zone is a great heuristic
for reducing write amplification and we do this already - but only
for sequential writes.

To support placing data in the same way for random writes, reuse the
xfs mru cache to map inodes to open zones on first write. If a mapping
is present, use the open zone for data placement for this file until
the zone is full.

Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
Hans Holmberg 2025-05-14 10:50:37 +00:00 committed by Carlos Maiolino
parent 70b95cb865
commit f3e2e53823
2 changed files with 110 additions and 0 deletions

View file

@ -236,6 +236,7 @@ typedef struct xfs_mount {
bool m_update_sb; /* sb needs update in mount */ bool m_update_sb; /* sb needs update in mount */
unsigned int m_max_open_zones; unsigned int m_max_open_zones;
unsigned int m_zonegc_low_space; unsigned int m_zonegc_low_space;
struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */
/* max_atomic_write mount option value */ /* max_atomic_write mount option value */
unsigned long long m_awu_max_bytes; unsigned long long m_awu_max_bytes;

View file

@ -24,6 +24,7 @@
#include "xfs_zone_priv.h" #include "xfs_zone_priv.h"
#include "xfs_zones.h" #include "xfs_zones.h"
#include "xfs_trace.h" #include "xfs_trace.h"
#include "xfs_mru_cache.h"
void void
xfs_open_zone_put( xfs_open_zone_put(
@ -796,6 +797,100 @@ xfs_submit_zoned_bio(
submit_bio(&ioend->io_bio); submit_bio(&ioend->io_bio);
} }
/*
* Cache the last zone written to for an inode so that it is considered first
* for subsequent writes.
*/
struct xfs_zone_cache_item {
struct xfs_mru_cache_elem mru;
struct xfs_open_zone *oz;
};
static inline struct xfs_zone_cache_item *
xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
{
return container_of(mru, struct xfs_zone_cache_item, mru);
}
static void
xfs_zone_cache_free_func(
void *data,
struct xfs_mru_cache_elem *mru)
{
struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
xfs_open_zone_put(item->oz);
kfree(item);
}
/*
* Check if we have a cached last open zone available for the inode and
* if yes return a reference to it.
*/
static struct xfs_open_zone *
xfs_cached_zone(
struct xfs_mount *mp,
struct xfs_inode *ip)
{
struct xfs_mru_cache_elem *mru;
struct xfs_open_zone *oz;
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
if (!mru)
return NULL;
oz = xfs_zone_cache_item(mru)->oz;
if (oz) {
/*
* GC only steals open zones at mount time, so no GC zones
* should end up in the cache.
*/
ASSERT(!oz->oz_is_gc);
ASSERT(atomic_read(&oz->oz_ref) > 0);
atomic_inc(&oz->oz_ref);
}
xfs_mru_cache_done(mp->m_zone_cache);
return oz;
}
/*
* Update the last used zone cache for a given inode.
*
* The caller must have a reference on the open zone.
*/
static void
xfs_zone_cache_create_association(
struct xfs_inode *ip,
struct xfs_open_zone *oz)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_zone_cache_item *item = NULL;
struct xfs_mru_cache_elem *mru;
ASSERT(atomic_read(&oz->oz_ref) > 0);
atomic_inc(&oz->oz_ref);
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
if (mru) {
/*
* If we have an association already, update it to point to the
* new zone.
*/
item = xfs_zone_cache_item(mru);
xfs_open_zone_put(item->oz);
item->oz = oz;
xfs_mru_cache_done(mp->m_zone_cache);
return;
}
item = kmalloc(sizeof(*item), GFP_KERNEL);
if (!item) {
xfs_open_zone_put(oz);
return;
}
item->oz = oz;
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
}
void void
xfs_zone_alloc_and_submit( xfs_zone_alloc_and_submit(
struct iomap_ioend *ioend, struct iomap_ioend *ioend,
@ -819,11 +914,16 @@ xfs_zone_alloc_and_submit(
*/ */
if (!*oz && ioend->io_offset) if (!*oz && ioend->io_offset)
*oz = xfs_last_used_zone(ioend); *oz = xfs_last_used_zone(ioend);
if (!*oz)
*oz = xfs_cached_zone(mp, ip);
if (!*oz) { if (!*oz) {
select_zone: select_zone:
*oz = xfs_select_zone(mp, write_hint, pack_tight); *oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz) if (!*oz)
goto out_error; goto out_error;
xfs_zone_cache_create_association(ip, *oz);
} }
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size), alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@ -1211,6 +1311,14 @@ xfs_mount_zones(
error = xfs_zone_gc_mount(mp); error = xfs_zone_gc_mount(mp);
if (error) if (error)
goto out_free_zone_info; goto out_free_zone_info;
/*
* Set up a mru cache to track inode to open zone for data placement
* purposes. The magic values for group count and life time is the
* same as the defaults for file streams, which seems sane enough.
*/
xfs_mru_cache_create(&mp->m_zone_cache, mp,
5000, 10, xfs_zone_cache_free_func);
return 0; return 0;
out_free_zone_info: out_free_zone_info:
@ -1224,4 +1332,5 @@ xfs_unmount_zones(
{ {
xfs_zone_gc_unmount(mp); xfs_zone_gc_unmount(mp);
xfs_free_zone_info(mp->m_zone_info); xfs_free_zone_info(mp->m_zone_info);
xfs_mru_cache_destroy(mp->m_zone_cache);
} }