mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

damon_migrate_pages() tries migration even if the target node is invalid.
If users mistakenly make such invalid requests via
DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen.
[ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48
[ 7831.884160] #PF: supervisor read access in kernel mode
[ 7831.884681] #PF: error_code(0x0000) - not-present page
[ 7831.885203] PGD 0 P4D 0
[ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI
[ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary)
[ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014
[ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137)
[...]
[ 7831.895953] Call Trace:
[ 7831.896195] <TASK>
[ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192)
[ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851)
[ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.897735] migrate_pages (mm/migrate.c:2078)
[ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354)
[ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405)
[...]
Add a target node validity check in damon_migrate_pages(). The validity
check is stolen from that of do_pages_move(), which is being used for the
move_pages() system call.
Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org
Fixes: b51820ebea
("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x]
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Honggyu Kim <honggyu.kim@sk.com>
Cc: Hyeongtak Ji <hyeongtak.ji@sk.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
414 lines
9.7 KiB
C
414 lines
9.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Common Code for Data Access Monitoring
|
|
*
|
|
* Author: SeongJae Park <sj@kernel.org>
|
|
*/
|
|
|
|
#include <linux/migrate.h>
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/page_idle.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/rmap.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/swapops.h>
|
|
|
|
#include "../internal.h"
|
|
#include "ops-common.h"
|
|
|
|
/*
|
|
* Get an online page for a pfn if it's in the LRU list. Otherwise, returns
|
|
* NULL.
|
|
*
|
|
* The body of this function is stolen from the 'page_idle_get_folio()'. We
|
|
* steal rather than reuse it because the code is quite simple.
|
|
*/
|
|
struct folio *damon_get_folio(unsigned long pfn)
|
|
{
|
|
struct page *page = pfn_to_online_page(pfn);
|
|
struct folio *folio;
|
|
|
|
if (!page)
|
|
return NULL;
|
|
|
|
folio = page_folio(page);
|
|
if (!folio_test_lru(folio) || !folio_try_get(folio))
|
|
return NULL;
|
|
if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) {
|
|
folio_put(folio);
|
|
folio = NULL;
|
|
}
|
|
return folio;
|
|
}
|
|
|
|
void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
|
|
{
|
|
pte_t pteval = ptep_get(pte);
|
|
struct folio *folio;
|
|
bool young = false;
|
|
unsigned long pfn;
|
|
|
|
if (likely(pte_present(pteval)))
|
|
pfn = pte_pfn(pteval);
|
|
else
|
|
pfn = swp_offset_pfn(pte_to_swp_entry(pteval));
|
|
|
|
folio = damon_get_folio(pfn);
|
|
if (!folio)
|
|
return;
|
|
|
|
/*
|
|
* PFN swap PTEs, such as device-exclusive ones, that actually map pages
|
|
* are "old" from a CPU perspective. The MMU notifier takes care of any
|
|
* device aspects.
|
|
*/
|
|
if (likely(pte_present(pteval)))
|
|
young |= ptep_test_and_clear_young(vma, addr, pte);
|
|
young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
|
|
if (young)
|
|
folio_set_young(folio);
|
|
|
|
folio_set_idle(folio);
|
|
folio_put(folio);
|
|
}
|
|
|
|
void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
|
|
{
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd)));
|
|
|
|
if (!folio)
|
|
return;
|
|
|
|
if (pmdp_clear_young_notify(vma, addr, pmd))
|
|
folio_set_young(folio);
|
|
|
|
folio_set_idle(folio);
|
|
folio_put(folio);
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
}
|
|
|
|
#define DAMON_MAX_SUBSCORE (100)
|
|
#define DAMON_MAX_AGE_IN_LOG (32)
|
|
|
|
int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
|
|
struct damos *s)
|
|
{
|
|
int freq_subscore;
|
|
unsigned int age_in_sec;
|
|
int age_in_log, age_subscore;
|
|
unsigned int freq_weight = s->quota.weight_nr_accesses;
|
|
unsigned int age_weight = s->quota.weight_age;
|
|
int hotness;
|
|
|
|
freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
|
|
damon_max_nr_accesses(&c->attrs);
|
|
|
|
age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
|
|
for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
|
|
age_in_log++, age_in_sec >>= 1)
|
|
;
|
|
|
|
/* If frequency is 0, higher age means it's colder */
|
|
if (freq_subscore == 0)
|
|
age_in_log *= -1;
|
|
|
|
/*
|
|
* Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
|
|
* Scale it to be in [0, 100] and set it as age subscore.
|
|
*/
|
|
age_in_log += DAMON_MAX_AGE_IN_LOG;
|
|
age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
|
|
DAMON_MAX_AGE_IN_LOG / 2;
|
|
|
|
hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
|
|
if (freq_weight + age_weight)
|
|
hotness /= freq_weight + age_weight;
|
|
/*
|
|
* Transform it to fit in [0, DAMOS_MAX_SCORE]
|
|
*/
|
|
hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
|
|
|
|
return hotness;
|
|
}
|
|
|
|
int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
|
|
struct damos *s)
|
|
{
|
|
int hotness = damon_hot_score(c, r, s);
|
|
|
|
/* Return coldness of the region */
|
|
return DAMOS_MAX_SCORE - hotness;
|
|
}
|
|
|
|
static bool damon_folio_mkold_one(struct folio *folio,
|
|
struct vm_area_struct *vma, unsigned long addr, void *arg)
|
|
{
|
|
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
|
|
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
addr = pvmw.address;
|
|
if (pvmw.pte)
|
|
damon_ptep_mkold(pvmw.pte, vma, addr);
|
|
else
|
|
damon_pmdp_mkold(pvmw.pmd, vma, addr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void damon_folio_mkold(struct folio *folio)
|
|
{
|
|
struct rmap_walk_control rwc = {
|
|
.rmap_one = damon_folio_mkold_one,
|
|
.anon_lock = folio_lock_anon_vma_read,
|
|
};
|
|
bool need_lock;
|
|
|
|
if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
|
|
folio_set_idle(folio);
|
|
return;
|
|
}
|
|
|
|
need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
|
|
if (need_lock && !folio_trylock(folio))
|
|
return;
|
|
|
|
rmap_walk(folio, &rwc);
|
|
|
|
if (need_lock)
|
|
folio_unlock(folio);
|
|
|
|
}
|
|
|
|
static bool damon_folio_young_one(struct folio *folio,
|
|
struct vm_area_struct *vma, unsigned long addr, void *arg)
|
|
{
|
|
bool *accessed = arg;
|
|
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
|
|
pte_t pte;
|
|
|
|
*accessed = false;
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
addr = pvmw.address;
|
|
if (pvmw.pte) {
|
|
pte = ptep_get(pvmw.pte);
|
|
|
|
/*
|
|
* PFN swap PTEs, such as device-exclusive ones, that
|
|
* actually map pages are "old" from a CPU perspective.
|
|
* The MMU notifier takes care of any device aspects.
|
|
*/
|
|
*accessed = (pte_present(pte) && pte_young(pte)) ||
|
|
!folio_test_idle(folio) ||
|
|
mmu_notifier_test_young(vma->vm_mm, addr);
|
|
} else {
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
*accessed = pmd_young(pmdp_get(pvmw.pmd)) ||
|
|
!folio_test_idle(folio) ||
|
|
mmu_notifier_test_young(vma->vm_mm, addr);
|
|
#else
|
|
WARN_ON_ONCE(1);
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
}
|
|
if (*accessed) {
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* If accessed, stop walking */
|
|
return *accessed == false;
|
|
}
|
|
|
|
bool damon_folio_young(struct folio *folio)
|
|
{
|
|
bool accessed = false;
|
|
struct rmap_walk_control rwc = {
|
|
.arg = &accessed,
|
|
.rmap_one = damon_folio_young_one,
|
|
.anon_lock = folio_lock_anon_vma_read,
|
|
};
|
|
bool need_lock;
|
|
|
|
if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
|
|
if (folio_test_idle(folio))
|
|
return false;
|
|
else
|
|
return true;
|
|
}
|
|
|
|
need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
|
|
if (need_lock && !folio_trylock(folio))
|
|
return false;
|
|
|
|
rmap_walk(folio, &rwc);
|
|
|
|
if (need_lock)
|
|
folio_unlock(folio);
|
|
|
|
return accessed;
|
|
}
|
|
|
|
bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio)
|
|
{
|
|
bool matched = false;
|
|
struct mem_cgroup *memcg;
|
|
size_t folio_sz;
|
|
|
|
switch (filter->type) {
|
|
case DAMOS_FILTER_TYPE_ANON:
|
|
matched = folio_test_anon(folio);
|
|
break;
|
|
case DAMOS_FILTER_TYPE_ACTIVE:
|
|
matched = folio_test_active(folio);
|
|
break;
|
|
case DAMOS_FILTER_TYPE_MEMCG:
|
|
rcu_read_lock();
|
|
memcg = folio_memcg_check(folio);
|
|
if (!memcg)
|
|
matched = false;
|
|
else
|
|
matched = filter->memcg_id == mem_cgroup_id(memcg);
|
|
rcu_read_unlock();
|
|
break;
|
|
case DAMOS_FILTER_TYPE_YOUNG:
|
|
matched = damon_folio_young(folio);
|
|
if (matched)
|
|
damon_folio_mkold(folio);
|
|
break;
|
|
case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
|
|
folio_sz = folio_size(folio);
|
|
matched = filter->sz_range.min <= folio_sz &&
|
|
folio_sz <= filter->sz_range.max;
|
|
break;
|
|
case DAMOS_FILTER_TYPE_UNMAPPED:
|
|
matched = !folio_mapped(folio) || !folio_raw_mapping(folio);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return matched == filter->matching;
|
|
}
|
|
|
|
static unsigned int __damon_migrate_folio_list(
|
|
struct list_head *migrate_folios, struct pglist_data *pgdat,
|
|
int target_nid)
|
|
{
|
|
unsigned int nr_succeeded = 0;
|
|
struct migration_target_control mtc = {
|
|
/*
|
|
* Allocate from 'node', or fail quickly and quietly.
|
|
* When this happens, 'page' will likely just be discarded
|
|
* instead of migrated.
|
|
*/
|
|
.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
|
|
__GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT,
|
|
.nid = target_nid,
|
|
};
|
|
|
|
if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
|
|
return 0;
|
|
|
|
if (list_empty(migrate_folios))
|
|
return 0;
|
|
|
|
/* Migration ignores all cpuset and mempolicy settings */
|
|
migrate_pages(migrate_folios, alloc_migration_target, NULL,
|
|
(unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON,
|
|
&nr_succeeded);
|
|
|
|
return nr_succeeded;
|
|
}
|
|
|
|
static unsigned int damon_migrate_folio_list(struct list_head *folio_list,
|
|
struct pglist_data *pgdat,
|
|
int target_nid)
|
|
{
|
|
unsigned int nr_migrated = 0;
|
|
struct folio *folio;
|
|
LIST_HEAD(ret_folios);
|
|
LIST_HEAD(migrate_folios);
|
|
|
|
while (!list_empty(folio_list)) {
|
|
struct folio *folio;
|
|
|
|
cond_resched();
|
|
|
|
folio = lru_to_folio(folio_list);
|
|
list_del(&folio->lru);
|
|
|
|
if (!folio_trylock(folio))
|
|
goto keep;
|
|
|
|
/* Relocate its contents to another node. */
|
|
list_add(&folio->lru, &migrate_folios);
|
|
folio_unlock(folio);
|
|
continue;
|
|
keep:
|
|
list_add(&folio->lru, &ret_folios);
|
|
}
|
|
/* 'folio_list' is always empty here */
|
|
|
|
/* Migrate folios selected for migration */
|
|
nr_migrated += __damon_migrate_folio_list(
|
|
&migrate_folios, pgdat, target_nid);
|
|
/*
|
|
* Folios that could not be migrated are still in @migrate_folios. Add
|
|
* those back on @folio_list
|
|
*/
|
|
if (!list_empty(&migrate_folios))
|
|
list_splice_init(&migrate_folios, folio_list);
|
|
|
|
try_to_unmap_flush();
|
|
|
|
list_splice(&ret_folios, folio_list);
|
|
|
|
while (!list_empty(folio_list)) {
|
|
folio = lru_to_folio(folio_list);
|
|
list_del(&folio->lru);
|
|
folio_putback_lru(folio);
|
|
}
|
|
|
|
return nr_migrated;
|
|
}
|
|
|
|
unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
|
|
{
|
|
int nid;
|
|
unsigned long nr_migrated = 0;
|
|
LIST_HEAD(node_folio_list);
|
|
unsigned int noreclaim_flag;
|
|
|
|
if (list_empty(folio_list))
|
|
return nr_migrated;
|
|
|
|
if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
|
|
!node_state(target_nid, N_MEMORY))
|
|
return nr_migrated;
|
|
|
|
noreclaim_flag = memalloc_noreclaim_save();
|
|
|
|
nid = folio_nid(lru_to_folio(folio_list));
|
|
do {
|
|
struct folio *folio = lru_to_folio(folio_list);
|
|
|
|
if (nid == folio_nid(folio)) {
|
|
list_move(&folio->lru, &node_folio_list);
|
|
continue;
|
|
}
|
|
|
|
nr_migrated += damon_migrate_folio_list(&node_folio_list,
|
|
NODE_DATA(nid),
|
|
target_nid);
|
|
nid = folio_nid(lru_to_folio(folio_list));
|
|
} while (!list_empty(folio_list));
|
|
|
|
nr_migrated += damon_migrate_folio_list(&node_folio_list,
|
|
NODE_DATA(nid),
|
|
target_nid);
|
|
|
|
memalloc_noreclaim_restore(noreclaim_flag);
|
|
|
|
return nr_migrated;
|
|
}
|