mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
mm, swap: simplify folio swap allocation
With slot cache gone, clean up the allocation helpers even more. folio_alloc_swap will be the only entry for allocation and adding the folio to swap cache (except suspend), making it opposite of folio_free_swap. Link: https://lkml.kernel.org/r/20250313165935.63303-8-ryncsn@gmail.com Signed-off-by: Kairui Song <kasong@tencent.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <v-songbaohua@oppo.com> Cc: Chris Li <chrisl@kernel.org> Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
0ff67f990b
commit
b487a2da35
6 changed files with 96 additions and 125 deletions
|
@ -478,7 +478,7 @@ static inline long get_nr_swap_pages(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void si_swapinfo(struct sysinfo *);
|
extern void si_swapinfo(struct sysinfo *);
|
||||||
swp_entry_t folio_alloc_swap(struct folio *folio);
|
int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask);
|
||||||
bool folio_free_swap(struct folio *folio);
|
bool folio_free_swap(struct folio *folio);
|
||||||
void put_swap_folio(struct folio *folio, swp_entry_t entry);
|
void put_swap_folio(struct folio *folio, swp_entry_t entry);
|
||||||
extern swp_entry_t get_swap_page_of_type(int);
|
extern swp_entry_t get_swap_page_of_type(int);
|
||||||
|
@ -586,11 +586,9 @@ static inline int swp_swapcount(swp_entry_t entry)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline swp_entry_t folio_alloc_swap(struct folio *folio)
|
static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
swp_entry_t entry;
|
return -EINVAL;
|
||||||
entry.val = 0;
|
|
||||||
return entry;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool folio_free_swap(struct folio *folio)
|
static inline bool folio_free_swap(struct folio *folio)
|
||||||
|
|
21
mm/shmem.c
21
mm/shmem.c
|
@ -1533,7 +1533,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||||
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
swp_entry_t swap;
|
|
||||||
pgoff_t index;
|
pgoff_t index;
|
||||||
int nr_pages;
|
int nr_pages;
|
||||||
bool split = false;
|
bool split = false;
|
||||||
|
@ -1615,14 +1614,6 @@ try_split:
|
||||||
folio_mark_uptodate(folio);
|
folio_mark_uptodate(folio);
|
||||||
}
|
}
|
||||||
|
|
||||||
swap = folio_alloc_swap(folio);
|
|
||||||
if (!swap.val) {
|
|
||||||
if (nr_pages > 1)
|
|
||||||
goto try_split;
|
|
||||||
|
|
||||||
goto redirty;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add inode to shmem_unuse()'s list of swapped-out inodes,
|
* Add inode to shmem_unuse()'s list of swapped-out inodes,
|
||||||
* if it's not already there. Do it now before the folio is
|
* if it's not already there. Do it now before the folio is
|
||||||
|
@ -1635,20 +1626,20 @@ try_split:
|
||||||
if (list_empty(&info->swaplist))
|
if (list_empty(&info->swaplist))
|
||||||
list_add(&info->swaplist, &shmem_swaplist);
|
list_add(&info->swaplist, &shmem_swaplist);
|
||||||
|
|
||||||
if (add_to_swap_cache(folio, swap,
|
if (!folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN)) {
|
||||||
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
|
|
||||||
NULL) == 0) {
|
|
||||||
shmem_recalc_inode(inode, 0, nr_pages);
|
shmem_recalc_inode(inode, 0, nr_pages);
|
||||||
swap_shmem_alloc(swap, nr_pages);
|
swap_shmem_alloc(folio->swap, nr_pages);
|
||||||
shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
|
shmem_delete_from_page_cache(folio, swp_to_radix_entry(folio->swap));
|
||||||
|
|
||||||
mutex_unlock(&shmem_swaplist_mutex);
|
mutex_unlock(&shmem_swaplist_mutex);
|
||||||
BUG_ON(folio_mapped(folio));
|
BUG_ON(folio_mapped(folio));
|
||||||
return swap_writepage(&folio->page, wbc);
|
return swap_writepage(&folio->page, wbc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list_del_init(&info->swaplist);
|
||||||
mutex_unlock(&shmem_swaplist_mutex);
|
mutex_unlock(&shmem_swaplist_mutex);
|
||||||
put_swap_folio(folio, swap);
|
if (nr_pages > 1)
|
||||||
|
goto try_split;
|
||||||
redirty:
|
redirty:
|
||||||
folio_mark_dirty(folio);
|
folio_mark_dirty(folio);
|
||||||
if (wbc->for_reclaim)
|
if (wbc->for_reclaim)
|
||||||
|
|
|
@ -50,7 +50,6 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
void show_swap_cache_info(void);
|
void show_swap_cache_info(void);
|
||||||
bool add_to_swap(struct folio *folio);
|
|
||||||
void *get_shadow_from_swap_cache(swp_entry_t entry);
|
void *get_shadow_from_swap_cache(swp_entry_t entry);
|
||||||
int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
|
int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
|
||||||
gfp_t gfp, void **shadowp);
|
gfp_t gfp, void **shadowp);
|
||||||
|
@ -163,11 +162,6 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping,
|
||||||
return filemap_get_folio(mapping, index);
|
return filemap_get_folio(mapping, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool add_to_swap(struct folio *folio)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
|
static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -166,63 +166,6 @@ void __delete_from_swap_cache(struct folio *folio,
|
||||||
__lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr);
|
__lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* add_to_swap - allocate swap space for a folio
|
|
||||||
* @folio: folio we want to move to swap
|
|
||||||
*
|
|
||||||
* Allocate swap space for the folio and add the folio to the
|
|
||||||
* swap cache.
|
|
||||||
*
|
|
||||||
* Context: Caller needs to hold the folio lock.
|
|
||||||
* Return: Whether the folio was added to the swap cache.
|
|
||||||
*/
|
|
||||||
bool add_to_swap(struct folio *folio)
|
|
||||||
{
|
|
||||||
swp_entry_t entry;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
|
||||||
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
|
|
||||||
|
|
||||||
entry = folio_alloc_swap(folio);
|
|
||||||
if (!entry.val)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XArray node allocations from PF_MEMALLOC contexts could
|
|
||||||
* completely exhaust the page allocator. __GFP_NOMEMALLOC
|
|
||||||
* stops emergency reserves from being allocated.
|
|
||||||
*
|
|
||||||
* TODO: this could cause a theoretical memory reclaim
|
|
||||||
* deadlock in the swap out path.
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* Add it to the swap cache.
|
|
||||||
*/
|
|
||||||
err = add_to_swap_cache(folio, entry,
|
|
||||||
__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
|
|
||||||
if (err)
|
|
||||||
goto fail;
|
|
||||||
/*
|
|
||||||
* Normally the folio will be dirtied in unmap because its
|
|
||||||
* pte should be dirty. A special case is MADV_FREE page. The
|
|
||||||
* page's pte could have dirty bit cleared but the folio's
|
|
||||||
* SwapBacked flag is still set because clearing the dirty bit
|
|
||||||
* and SwapBacked flag has no lock protected. For such folio,
|
|
||||||
* unmap will not set dirty bit for it, so folio reclaim will
|
|
||||||
* not write the folio out. This can cause data corruption when
|
|
||||||
* the folio is swapped in later. Always setting the dirty flag
|
|
||||||
* for the folio solves the problem.
|
|
||||||
*/
|
|
||||||
folio_mark_dirty(folio);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
|
|
||||||
fail:
|
|
||||||
put_swap_folio(folio, entry);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This must be called only on folios that have
|
* This must be called only on folios that have
|
||||||
* been verified to be in the swap cache and locked.
|
* been verified to be in the swap cache and locked.
|
||||||
|
|
113
mm/swapfile.c
113
mm/swapfile.c
|
@ -1176,9 +1176,8 @@ static bool get_swap_device_info(struct swap_info_struct *si)
|
||||||
* Fast path try to get swap entries with specified order from current
|
* Fast path try to get swap entries with specified order from current
|
||||||
* CPU's swap entry pool (a cluster).
|
* CPU's swap entry pool (a cluster).
|
||||||
*/
|
*/
|
||||||
static int swap_alloc_fast(swp_entry_t *entry,
|
static bool swap_alloc_fast(swp_entry_t *entry,
|
||||||
unsigned char usage,
|
int order)
|
||||||
int order)
|
|
||||||
{
|
{
|
||||||
struct swap_cluster_info *ci;
|
struct swap_cluster_info *ci;
|
||||||
struct swap_info_struct *si;
|
struct swap_info_struct *si;
|
||||||
|
@ -1197,7 +1196,7 @@ static int swap_alloc_fast(swp_entry_t *entry,
|
||||||
if (cluster_is_usable(ci, order)) {
|
if (cluster_is_usable(ci, order)) {
|
||||||
if (cluster_is_empty(ci))
|
if (cluster_is_empty(ci))
|
||||||
offset = cluster_offset(si, ci);
|
offset = cluster_offset(si, ci);
|
||||||
found = alloc_swap_scan_cluster(si, ci, offset, order, usage);
|
found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE);
|
||||||
if (found)
|
if (found)
|
||||||
*entry = swp_entry(si->type, found);
|
*entry = swp_entry(si->type, found);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1208,47 +1207,30 @@ static int swap_alloc_fast(swp_entry_t *entry,
|
||||||
return !!found;
|
return !!found;
|
||||||
}
|
}
|
||||||
|
|
||||||
swp_entry_t folio_alloc_swap(struct folio *folio)
|
/* Rotate the device and switch to a new cluster */
|
||||||
|
static bool swap_alloc_slow(swp_entry_t *entry,
|
||||||
|
int order)
|
||||||
{
|
{
|
||||||
unsigned int order = folio_order(folio);
|
|
||||||
unsigned int size = 1 << order;
|
|
||||||
struct swap_info_struct *si, *next;
|
|
||||||
swp_entry_t entry = {};
|
|
||||||
unsigned long offset;
|
|
||||||
int node;
|
int node;
|
||||||
|
unsigned long offset;
|
||||||
|
struct swap_info_struct *si, *next;
|
||||||
|
|
||||||
if (order) {
|
node = numa_node_id();
|
||||||
/*
|
|
||||||
* Should not even be attempting large allocations when huge
|
|
||||||
* page swap is disabled. Warn and fail the allocation.
|
|
||||||
*/
|
|
||||||
if (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER) {
|
|
||||||
VM_WARN_ON_ONCE(1);
|
|
||||||
return entry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fast path using percpu cluster */
|
|
||||||
local_lock(&percpu_swap_cluster.lock);
|
|
||||||
if (swap_alloc_fast(&entry, SWAP_HAS_CACHE, order))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/* Rotate the device and switch to a new cluster */
|
|
||||||
spin_lock(&swap_avail_lock);
|
spin_lock(&swap_avail_lock);
|
||||||
start_over:
|
start_over:
|
||||||
node = numa_node_id();
|
|
||||||
plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
|
plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
|
||||||
|
/* Rotate the device and switch to a new cluster */
|
||||||
plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
|
plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
|
||||||
spin_unlock(&swap_avail_lock);
|
spin_unlock(&swap_avail_lock);
|
||||||
if (get_swap_device_info(si)) {
|
if (get_swap_device_info(si)) {
|
||||||
offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE);
|
offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE);
|
||||||
put_swap_device(si);
|
put_swap_device(si);
|
||||||
if (offset) {
|
if (offset) {
|
||||||
entry = swp_entry(si->type, offset);
|
*entry = swp_entry(si->type, offset);
|
||||||
goto out;
|
return true;
|
||||||
}
|
}
|
||||||
if (order)
|
if (order)
|
||||||
goto out;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&swap_avail_lock);
|
spin_lock(&swap_avail_lock);
|
||||||
|
@ -1267,16 +1249,67 @@ start_over:
|
||||||
goto start_over;
|
goto start_over;
|
||||||
}
|
}
|
||||||
spin_unlock(&swap_avail_lock);
|
spin_unlock(&swap_avail_lock);
|
||||||
out:
|
return false;
|
||||||
local_unlock(&percpu_swap_cluster.lock);
|
}
|
||||||
/* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */
|
|
||||||
if (mem_cgroup_try_charge_swap(folio, entry)) {
|
/**
|
||||||
put_swap_folio(folio, entry);
|
* folio_alloc_swap - allocate swap space for a folio
|
||||||
entry.val = 0;
|
* @folio: folio we want to move to swap
|
||||||
|
* @gfp: gfp mask for shadow nodes
|
||||||
|
*
|
||||||
|
* Allocate swap space for the folio and add the folio to the
|
||||||
|
* swap cache.
|
||||||
|
*
|
||||||
|
* Context: Caller needs to hold the folio lock.
|
||||||
|
* Return: Whether the folio was added to the swap cache.
|
||||||
|
*/
|
||||||
|
int folio_alloc_swap(struct folio *folio, gfp_t gfp)
|
||||||
|
{
|
||||||
|
unsigned int order = folio_order(folio);
|
||||||
|
unsigned int size = 1 << order;
|
||||||
|
swp_entry_t entry = {};
|
||||||
|
|
||||||
|
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
||||||
|
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should not even be attempting large allocations when huge
|
||||||
|
* page swap is disabled. Warn and fail the allocation.
|
||||||
|
*/
|
||||||
|
if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) {
|
||||||
|
VM_WARN_ON_ONCE(1);
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (entry.val)
|
|
||||||
atomic_long_sub(size, &nr_swap_pages);
|
local_lock(&percpu_swap_cluster.lock);
|
||||||
return entry;
|
if (!swap_alloc_fast(&entry, order))
|
||||||
|
swap_alloc_slow(&entry, order);
|
||||||
|
local_unlock(&percpu_swap_cluster.lock);
|
||||||
|
|
||||||
|
/* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */
|
||||||
|
if (mem_cgroup_try_charge_swap(folio, entry))
|
||||||
|
goto out_free;
|
||||||
|
|
||||||
|
if (!entry.val)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XArray node allocations from PF_MEMALLOC contexts could
|
||||||
|
* completely exhaust the page allocator. __GFP_NOMEMALLOC
|
||||||
|
* stops emergency reserves from being allocated.
|
||||||
|
*
|
||||||
|
* TODO: this could cause a theoretical memory reclaim
|
||||||
|
* deadlock in the swap out path.
|
||||||
|
*/
|
||||||
|
if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL))
|
||||||
|
goto out_free;
|
||||||
|
|
||||||
|
atomic_long_sub(size, &nr_swap_pages);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out_free:
|
||||||
|
put_swap_folio(folio, entry);
|
||||||
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
|
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
|
||||||
|
|
16
mm/vmscan.c
16
mm/vmscan.c
|
@ -1289,7 +1289,7 @@ retry:
|
||||||
split_folio_to_list(folio, folio_list))
|
split_folio_to_list(folio, folio_list))
|
||||||
goto activate_locked;
|
goto activate_locked;
|
||||||
}
|
}
|
||||||
if (!add_to_swap(folio)) {
|
if (folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOWARN)) {
|
||||||
int __maybe_unused order = folio_order(folio);
|
int __maybe_unused order = folio_order(folio);
|
||||||
|
|
||||||
if (!folio_test_large(folio))
|
if (!folio_test_large(folio))
|
||||||
|
@ -1305,9 +1305,21 @@ retry:
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK);
|
count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK);
|
||||||
if (!add_to_swap(folio))
|
if (folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOWARN))
|
||||||
goto activate_locked_split;
|
goto activate_locked_split;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Normally the folio will be dirtied in unmap because its
|
||||||
|
* pte should be dirty. A special case is MADV_FREE page. The
|
||||||
|
* page's pte could have dirty bit cleared but the folio's
|
||||||
|
* SwapBacked flag is still set because clearing the dirty bit
|
||||||
|
* and SwapBacked flag has no lock protected. For such folio,
|
||||||
|
* unmap will not set dirty bit for it, so folio reclaim will
|
||||||
|
* not write the folio out. This can cause data corruption when
|
||||||
|
* the folio is swapped in later. Always setting the dirty flag
|
||||||
|
* for the folio solves the problem.
|
||||||
|
*/
|
||||||
|
folio_mark_dirty(folio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue