// SPDX-License-Identifier: GPL-2.0 /* * Common Code for Data Access Monitoring * * Author: SeongJae Park */ #include #include #include #include #include #include #include #include "../internal.h" #include "ops-common.h" /* * Get an online page for a pfn if it's in the LRU list. Otherwise, returns * NULL. * * The body of this function is stolen from the 'page_idle_get_folio()'. We * steal rather than reuse it because the code is quite simple. */ struct folio *damon_get_folio(unsigned long pfn) { struct page *page = pfn_to_online_page(pfn); struct folio *folio; if (!page) return NULL; folio = page_folio(page); if (!folio_test_lru(folio) || !folio_try_get(folio)) return NULL; if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { folio_put(folio); folio = NULL; } return folio; } void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) { pte_t pteval = ptep_get(pte); struct folio *folio; bool young = false; unsigned long pfn; if (likely(pte_present(pteval))) pfn = pte_pfn(pteval); else pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); folio = damon_get_folio(pfn); if (!folio) return; /* * PFN swap PTEs, such as device-exclusive ones, that actually map pages * are "old" from a CPU perspective. The MMU notifier takes care of any * device aspects. */ if (likely(pte_present(pteval))) young |= ptep_test_and_clear_young(vma, addr, pte); young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); if (young) folio_set_young(folio); folio_set_idle(folio); folio_put(folio); } void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd))); if (!folio) return; if (pmdp_clear_young_notify(vma, addr, pmd)) folio_set_young(folio); folio_set_idle(folio); folio_put(folio); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ } #define DAMON_MAX_SUBSCORE (100) #define DAMON_MAX_AGE_IN_LOG (32) int damon_hot_score(struct damon_ctx *c, struct damon_region *r, struct damos *s) { int freq_subscore; unsigned int age_in_sec; int age_in_log, age_subscore; unsigned int freq_weight = s->quota.weight_nr_accesses; unsigned int age_weight = s->quota.weight_age; int hotness; freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / damon_max_nr_accesses(&c->attrs); age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; age_in_log++, age_in_sec >>= 1) ; /* If frequency is 0, higher age means it's colder */ if (freq_subscore == 0) age_in_log *= -1; /* * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. * Scale it to be in [0, 100] and set it as age subscore. */ age_in_log += DAMON_MAX_AGE_IN_LOG; age_subscore = age_in_log * DAMON_MAX_SUBSCORE / DAMON_MAX_AGE_IN_LOG / 2; hotness = (freq_weight * freq_subscore + age_weight * age_subscore); if (freq_weight + age_weight) hotness /= freq_weight + age_weight; /* * Transform it to fit in [0, DAMOS_MAX_SCORE] */ hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; return hotness; } int damon_cold_score(struct damon_ctx *c, struct damon_region *r, struct damos *s) { int hotness = damon_hot_score(c, r, s); /* Return coldness of the region */ return DAMOS_MAX_SCORE - hotness; } static bool damon_folio_mkold_one(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg) { DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); while (page_vma_mapped_walk(&pvmw)) { addr = pvmw.address; if (pvmw.pte) damon_ptep_mkold(pvmw.pte, vma, addr); else damon_pmdp_mkold(pvmw.pmd, vma, addr); } return true; } void damon_folio_mkold(struct folio *folio) { struct rmap_walk_control rwc = { .rmap_one = damon_folio_mkold_one, .anon_lock = folio_lock_anon_vma_read, }; bool need_lock; if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { folio_set_idle(folio); return; } need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); if (need_lock && !folio_trylock(folio)) return; rmap_walk(folio, &rwc); if (need_lock) folio_unlock(folio); } static bool damon_folio_young_one(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg) { bool *accessed = arg; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); pte_t pte; *accessed = false; while (page_vma_mapped_walk(&pvmw)) { addr = pvmw.address; if (pvmw.pte) { pte = ptep_get(pvmw.pte); /* * PFN swap PTEs, such as device-exclusive ones, that * actually map pages are "old" from a CPU perspective. * The MMU notifier takes care of any device aspects. */ *accessed = (pte_present(pte) && pte_young(pte)) || !folio_test_idle(folio) || mmu_notifier_test_young(vma->vm_mm, addr); } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE *accessed = pmd_young(pmdp_get(pvmw.pmd)) || !folio_test_idle(folio) || mmu_notifier_test_young(vma->vm_mm, addr); #else WARN_ON_ONCE(1); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ } if (*accessed) { page_vma_mapped_walk_done(&pvmw); break; } } /* If accessed, stop walking */ return *accessed == false; } bool damon_folio_young(struct folio *folio) { bool accessed = false; struct rmap_walk_control rwc = { .arg = &accessed, .rmap_one = damon_folio_young_one, .anon_lock = folio_lock_anon_vma_read, }; bool need_lock; if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { if (folio_test_idle(folio)) return false; else return true; } need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); if (need_lock && !folio_trylock(folio)) return false; rmap_walk(folio, &rwc); if (need_lock) folio_unlock(folio); return accessed; } bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) { bool matched = false; struct mem_cgroup *memcg; size_t folio_sz; switch (filter->type) { case DAMOS_FILTER_TYPE_ANON: matched = folio_test_anon(folio); break; case DAMOS_FILTER_TYPE_ACTIVE: matched = folio_test_active(folio); break; case DAMOS_FILTER_TYPE_MEMCG: rcu_read_lock(); memcg = folio_memcg_check(folio); if (!memcg) matched = false; else matched = filter->memcg_id == mem_cgroup_id(memcg); rcu_read_unlock(); break; case DAMOS_FILTER_TYPE_YOUNG: matched = damon_folio_young(folio); if (matched) damon_folio_mkold(folio); break; case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: folio_sz = folio_size(folio); matched = filter->sz_range.min <= folio_sz && folio_sz <= filter->sz_range.max; break; case DAMOS_FILTER_TYPE_UNMAPPED: matched = !folio_mapped(folio) || !folio_raw_mapping(folio); break; default: break; } return matched == filter->matching; } static unsigned int __damon_migrate_folio_list( struct list_head *migrate_folios, struct pglist_data *pgdat, int target_nid) { unsigned int nr_succeeded = 0; struct migration_target_control mtc = { /* * Allocate from 'node', or fail quickly and quietly. * When this happens, 'page' will likely just be discarded * instead of migrated. */ .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | __GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT, .nid = target_nid, }; if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) return 0; if (list_empty(migrate_folios)) return 0; /* Migration ignores all cpuset and mempolicy settings */ migrate_pages(migrate_folios, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, &nr_succeeded); return nr_succeeded; } static unsigned int damon_migrate_folio_list(struct list_head *folio_list, struct pglist_data *pgdat, int target_nid) { unsigned int nr_migrated = 0; struct folio *folio; LIST_HEAD(ret_folios); LIST_HEAD(migrate_folios); while (!list_empty(folio_list)) { struct folio *folio; cond_resched(); folio = lru_to_folio(folio_list); list_del(&folio->lru); if (!folio_trylock(folio)) goto keep; /* Relocate its contents to another node. */ list_add(&folio->lru, &migrate_folios); folio_unlock(folio); continue; keep: list_add(&folio->lru, &ret_folios); } /* 'folio_list' is always empty here */ /* Migrate folios selected for migration */ nr_migrated += __damon_migrate_folio_list( &migrate_folios, pgdat, target_nid); /* * Folios that could not be migrated are still in @migrate_folios. Add * those back on @folio_list */ if (!list_empty(&migrate_folios)) list_splice_init(&migrate_folios, folio_list); try_to_unmap_flush(); list_splice(&ret_folios, folio_list); while (!list_empty(folio_list)) { folio = lru_to_folio(folio_list); list_del(&folio->lru); folio_putback_lru(folio); } return nr_migrated; } unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) { int nid; unsigned long nr_migrated = 0; LIST_HEAD(node_folio_list); unsigned int noreclaim_flag; if (list_empty(folio_list)) return nr_migrated; if (target_nid < 0 || target_nid >= MAX_NUMNODES || !node_state(target_nid, N_MEMORY)) return nr_migrated; noreclaim_flag = memalloc_noreclaim_save(); nid = folio_nid(lru_to_folio(folio_list)); do { struct folio *folio = lru_to_folio(folio_list); if (nid == folio_nid(folio)) { list_move(&folio->lru, &node_folio_list); continue; } nr_migrated += damon_migrate_folio_list(&node_folio_list, NODE_DATA(nid), target_nid); nid = folio_nid(lru_to_folio(folio_list)); } while (!list_empty(folio_list)); nr_migrated += damon_migrate_folio_list(&node_folio_list, NODE_DATA(nid), target_nid); memalloc_noreclaim_restore(noreclaim_flag); return nr_migrated; }