mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
- The 2 patch series "zram: support algorithm-specific parameters" from
Sergey Senozhatsky adds infrastructure for passing algorithm-specific parameters into zram. A single parameter `winbits' is implemented at this time. - The 5 patch series "memcg: nmi-safe kmem charging" from Shakeel Butt makes memcg charging nmi-safe, which is required by BFP, which can operate in NMI context. - The 5 patch series "Some random fixes and cleanup to shmem" from Kemeng Shi implements small fixes and cleanups in the shmem code. - The 2 patch series "Skip mm selftests instead when kernel features are not present" from Zi Yan fixes some issues in the MM selftest code. - The 2 patch series "mm/damon: build-enable essential DAMON components by default" from SeongJae Park reworks DAMON Kconfig to make it easier to enable CONFIG_DAMON. - The 2 patch series "sched/numa: add statistics of numa balance task migration" from Libo Chen adds more info into sysfs and procfs files to improve visibility into the NUMA balancer's task migration activity. - The 4 patch series "selftests/mm: cow and gup_longterm cleanups" from Mark Brown provides various updates to some of the MM selftests to make them play better with the overall containing framework. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaDzA9wAKCRDdBJ7gKXxA js8sAP9V3COg+vzTmimzP3ocTkkbbIJzDfM6nXpE2EQ4BR3ejwD+NsIT2ZLtTF6O LqAZpgO7ju6wMjR/lM30ebCq5qFbZAw= =oruw -----END PGP SIGNATURE----- Merge tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull more MM updates from Andrew Morton: - "zram: support algorithm-specific parameters" from Sergey Senozhatsky adds infrastructure for passing algorithm-specific parameters into zram. A single parameter `winbits' is implemented at this time. - "memcg: nmi-safe kmem charging" from Shakeel Butt makes memcg charging nmi-safe, which is required by BFP, which can operate in NMI context. - "Some random fixes and cleanup to shmem" from Kemeng Shi implements small fixes and cleanups in the shmem code. - "Skip mm selftests instead when kernel features are not present" from Zi Yan fixes some issues in the MM selftest code. - "mm/damon: build-enable essential DAMON components by default" from SeongJae Park reworks DAMON Kconfig to make it easier to enable CONFIG_DAMON. - "sched/numa: add statistics of numa balance task migration" from Libo Chen adds more info into sysfs and procfs files to improve visibility into the NUMA balancer's task migration activity. - "selftests/mm: cow and gup_longterm cleanups" from Mark Brown provides various updates to some of the MM selftests to make them play better with the overall containing framework. * tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (43 commits) mm/khugepaged: clean up refcount check using folio_expected_ref_count() selftests/mm: fix test result reporting in gup_longterm selftests/mm: report unique test names for each cow test selftests/mm: add helper for logging test start and results selftests/mm: use standard ksft_finished() in cow and gup_longterm selftests/damon/_damon_sysfs: skip testcases if CONFIG_DAMON_SYSFS is disabled sched/numa: add statistics of numa balance task sched/numa: fix task swap by skipping kernel threads tools/testing: check correct variable in open_procmap() tools/testing/vma: add missing function stub mm/gup: update comment explaining why gup_fast() disables IRQs selftests/mm: two fixes for the pfnmap test mm/khugepaged: fix race with folio split/free using temporary reference mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order mmu_notifiers: remove leftover stub macros selftests/mm: deduplicate test names in madv_populate kcov: rust: add flags for KCOV with Rust mm: rust: make CONFIG_MMU ifdefs more narrow mmu_gather: move tlb flush for VM_PFNMAP/VM_MIXEDMAP vmas into free_pgtables() mm/damon/Kconfig: enable CONFIG_DAMON by default ...
This commit is contained in:
commit
fd1f847350
59 changed files with 909 additions and 408 deletions
|
@ -1732,6 +1732,12 @@ The following nested keys are defined.
|
|||
numa_hint_faults (npn)
|
||||
Number of NUMA hinting faults.
|
||||
|
||||
numa_task_migrated (npn)
|
||||
Number of task migration by NUMA balancing.
|
||||
|
||||
numa_task_swapped (npn)
|
||||
Number of task swap by NUMA balancing.
|
||||
|
||||
pgdemote_kswapd
|
||||
Number of pages demoted by kswapd.
|
||||
|
||||
|
|
|
@ -227,9 +227,9 @@ void __flush_dcache_folio(struct address_space *mapping, struct folio *folio)
|
|||
}
|
||||
|
||||
/*
|
||||
* If this is a page cache page, and we have an aliasing VIPT cache,
|
||||
* If this is a page cache folio, and we have an aliasing VIPT cache,
|
||||
* we only need to do one flush - which would be at the relevant
|
||||
* userspace colour, which is congruent with page->index.
|
||||
* userspace colour, which is congruent with folio->index.
|
||||
*/
|
||||
if (mapping && cache_is_vipt_aliasing())
|
||||
flush_pfn_alias(folio_pfn(folio), folio_pos(folio));
|
||||
|
|
|
@ -105,7 +105,8 @@ static struct list_head ptable_list[3] = {
|
|||
|
||||
#define PD_PTABLE(page) ((ptable_desc *)&(virt_to_page((void *)(page))->lru))
|
||||
#define PD_PAGE(ptable) (list_entry(ptable, struct page, lru))
|
||||
#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PAGE(dp)->index)
|
||||
#define PD_PTDESC(ptable) (list_entry(ptable, struct ptdesc, pt_list))
|
||||
#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PTDESC(dp)->pt_index)
|
||||
|
||||
static const int ptable_shift[3] = {
|
||||
7+2, /* PGD */
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include "backend_deflate.h"
|
||||
|
||||
/* Use the same value as crypto API */
|
||||
#define DEFLATE_DEF_WINBITS 11
|
||||
#define DEFLATE_DEF_WINBITS (-11)
|
||||
#define DEFLATE_DEF_MEMLEVEL MAX_MEM_LEVEL
|
||||
|
||||
struct deflate_ctx {
|
||||
|
@ -22,8 +22,10 @@ static void deflate_release_params(struct zcomp_params *params)
|
|||
|
||||
static int deflate_setup_params(struct zcomp_params *params)
|
||||
{
|
||||
if (params->level == ZCOMP_PARAM_NO_LEVEL)
|
||||
if (params->level == ZCOMP_PARAM_NOT_SET)
|
||||
params->level = Z_DEFAULT_COMPRESSION;
|
||||
if (params->deflate.winbits == ZCOMP_PARAM_NOT_SET)
|
||||
params->deflate.winbits = DEFLATE_DEF_WINBITS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -57,13 +59,13 @@ static int deflate_create(struct zcomp_params *params, struct zcomp_ctx *ctx)
|
|||
return -ENOMEM;
|
||||
|
||||
ctx->context = zctx;
|
||||
sz = zlib_deflate_workspacesize(-DEFLATE_DEF_WINBITS, MAX_MEM_LEVEL);
|
||||
sz = zlib_deflate_workspacesize(params->deflate.winbits, MAX_MEM_LEVEL);
|
||||
zctx->cctx.workspace = vzalloc(sz);
|
||||
if (!zctx->cctx.workspace)
|
||||
goto error;
|
||||
|
||||
ret = zlib_deflateInit2(&zctx->cctx, params->level, Z_DEFLATED,
|
||||
-DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
|
||||
params->deflate.winbits, DEFLATE_DEF_MEMLEVEL,
|
||||
Z_DEFAULT_STRATEGY);
|
||||
if (ret != Z_OK)
|
||||
goto error;
|
||||
|
@ -73,7 +75,7 @@ static int deflate_create(struct zcomp_params *params, struct zcomp_ctx *ctx)
|
|||
if (!zctx->dctx.workspace)
|
||||
goto error;
|
||||
|
||||
ret = zlib_inflateInit2(&zctx->dctx, -DEFLATE_DEF_WINBITS);
|
||||
ret = zlib_inflateInit2(&zctx->dctx, params->deflate.winbits);
|
||||
if (ret != Z_OK)
|
||||
goto error;
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ static void lz4_release_params(struct zcomp_params *params)
|
|||
|
||||
static int lz4_setup_params(struct zcomp_params *params)
|
||||
{
|
||||
if (params->level == ZCOMP_PARAM_NO_LEVEL)
|
||||
if (params->level == ZCOMP_PARAM_NOT_SET)
|
||||
params->level = LZ4_ACCELERATION_DEFAULT;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -18,7 +18,7 @@ static void lz4hc_release_params(struct zcomp_params *params)
|
|||
|
||||
static int lz4hc_setup_params(struct zcomp_params *params)
|
||||
{
|
||||
if (params->level == ZCOMP_PARAM_NO_LEVEL)
|
||||
if (params->level == ZCOMP_PARAM_NOT_SET)
|
||||
params->level = LZ4HC_DEFAULT_CLEVEL;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -58,7 +58,7 @@ static int zstd_setup_params(struct zcomp_params *params)
|
|||
return -ENOMEM;
|
||||
|
||||
params->drv_data = zp;
|
||||
if (params->level == ZCOMP_PARAM_NO_LEVEL)
|
||||
if (params->level == ZCOMP_PARAM_NOT_SET)
|
||||
params->level = zstd_default_clevel();
|
||||
|
||||
zp->cprm = zstd_get_params(params->level, PAGE_SIZE);
|
||||
|
|
|
@ -5,7 +5,11 @@
|
|||
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#define ZCOMP_PARAM_NO_LEVEL INT_MIN
|
||||
#define ZCOMP_PARAM_NOT_SET INT_MIN
|
||||
|
||||
struct deflate_params {
|
||||
s32 winbits;
|
||||
};
|
||||
|
||||
/*
|
||||
* Immutable driver (backend) parameters. The driver may attach private
|
||||
|
@ -17,6 +21,9 @@ struct zcomp_params {
|
|||
void *dict;
|
||||
size_t dict_sz;
|
||||
s32 level;
|
||||
union {
|
||||
struct deflate_params deflate;
|
||||
};
|
||||
|
||||
void *drv_data;
|
||||
};
|
||||
|
|
|
@ -1276,13 +1276,15 @@ static void comp_params_reset(struct zram *zram, u32 prio)
|
|||
struct zcomp_params *params = &zram->params[prio];
|
||||
|
||||
vfree(params->dict);
|
||||
params->level = ZCOMP_PARAM_NO_LEVEL;
|
||||
params->level = ZCOMP_PARAM_NOT_SET;
|
||||
params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
|
||||
params->dict_sz = 0;
|
||||
params->dict = NULL;
|
||||
}
|
||||
|
||||
static int comp_params_store(struct zram *zram, u32 prio, s32 level,
|
||||
const char *dict_path)
|
||||
const char *dict_path,
|
||||
struct deflate_params *deflate_params)
|
||||
{
|
||||
ssize_t sz = 0;
|
||||
|
||||
|
@ -1300,6 +1302,7 @@ static int comp_params_store(struct zram *zram, u32 prio, s32 level,
|
|||
|
||||
zram->params[prio].dict_sz = sz;
|
||||
zram->params[prio].level = level;
|
||||
zram->params[prio].deflate.winbits = deflate_params->winbits;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1308,11 +1311,14 @@ static ssize_t algorithm_params_store(struct device *dev,
|
|||
const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NO_LEVEL;
|
||||
s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
|
||||
char *args, *param, *val, *algo = NULL, *dict_path = NULL;
|
||||
struct deflate_params deflate_params;
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
int ret;
|
||||
|
||||
deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
|
||||
|
||||
args = skip_spaces(buf);
|
||||
while (*args) {
|
||||
args = next_arg(args, ¶m, &val);
|
||||
|
@ -1343,6 +1349,13 @@ static ssize_t algorithm_params_store(struct device *dev,
|
|||
dict_path = val;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(param, "deflate.winbits")) {
|
||||
ret = kstrtoint(val, 10, &deflate_params.winbits);
|
||||
if (ret)
|
||||
return ret;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Lookup priority by algorithm name */
|
||||
|
@ -1364,7 +1377,7 @@ static ssize_t algorithm_params_store(struct device *dev,
|
|||
if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
|
||||
return -EINVAL;
|
||||
|
||||
ret = comp_params_store(zram, prio, level, dict_path);
|
||||
ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
|
||||
return ret ? ret : len;
|
||||
}
|
||||
|
||||
|
|
|
@ -913,7 +913,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
struct ntfs_inode *ni = ntfs_i(inode);
|
||||
u64 valid = ni->i_valid;
|
||||
struct ntfs_sb_info *sbi = ni->mi.sbi;
|
||||
struct page *page, **pages = NULL;
|
||||
struct page **pages = NULL;
|
||||
struct folio *folio;
|
||||
size_t written = 0;
|
||||
u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
|
||||
u32 frame_size = 1u << frame_bits;
|
||||
|
@ -923,7 +924,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
u64 frame_vbo;
|
||||
pgoff_t index;
|
||||
bool frame_uptodate;
|
||||
struct folio *folio;
|
||||
|
||||
if (frame_size < PAGE_SIZE) {
|
||||
/*
|
||||
|
@ -977,8 +977,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
pages_per_frame);
|
||||
if (err) {
|
||||
for (ip = 0; ip < pages_per_frame; ip++) {
|
||||
page = pages[ip];
|
||||
folio = page_folio(page);
|
||||
folio = page_folio(pages[ip]);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
@ -989,10 +988,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
ip = off >> PAGE_SHIFT;
|
||||
off = offset_in_page(valid);
|
||||
for (; ip < pages_per_frame; ip++, off = 0) {
|
||||
page = pages[ip];
|
||||
folio = page_folio(page);
|
||||
zero_user_segment(page, off, PAGE_SIZE);
|
||||
flush_dcache_page(page);
|
||||
folio = page_folio(pages[ip]);
|
||||
folio_zero_segment(folio, off, PAGE_SIZE);
|
||||
flush_dcache_folio(folio);
|
||||
folio_mark_uptodate(folio);
|
||||
}
|
||||
|
||||
|
@ -1001,8 +999,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
ni_unlock(ni);
|
||||
|
||||
for (ip = 0; ip < pages_per_frame; ip++) {
|
||||
page = pages[ip];
|
||||
folio = page_folio(page);
|
||||
folio = page_folio(pages[ip]);
|
||||
folio_mark_uptodate(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
|
@ -1046,8 +1043,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
if (err) {
|
||||
for (ip = 0; ip < pages_per_frame;
|
||||
ip++) {
|
||||
page = pages[ip];
|
||||
folio = page_folio(page);
|
||||
folio = page_folio(pages[ip]);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
@ -1065,10 +1061,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
for (;;) {
|
||||
size_t cp, tail = PAGE_SIZE - off;
|
||||
|
||||
page = pages[ip];
|
||||
cp = copy_page_from_iter_atomic(page, off,
|
||||
folio = page_folio(pages[ip]);
|
||||
cp = copy_folio_from_iter_atomic(folio, off,
|
||||
min(tail, bytes), from);
|
||||
flush_dcache_page(page);
|
||||
flush_dcache_folio(folio);
|
||||
|
||||
copied += cp;
|
||||
bytes -= cp;
|
||||
|
@ -1088,9 +1084,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
ni_unlock(ni);
|
||||
|
||||
for (ip = 0; ip < pages_per_frame; ip++) {
|
||||
page = pages[ip];
|
||||
ClearPageDirty(page);
|
||||
folio = page_folio(page);
|
||||
folio = page_folio(pages[ip]);
|
||||
folio_clear_dirty(folio);
|
||||
folio_mark_uptodate(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
|
|
|
@ -58,6 +58,11 @@
|
|||
* Defaults to flushing at tlb_end_vma() to reset the range; helps when
|
||||
* there's large holes between the VMAs.
|
||||
*
|
||||
* - tlb_free_vmas()
|
||||
*
|
||||
* tlb_free_vmas() marks the start of unlinking of one or more vmas
|
||||
* and freeing page-tables.
|
||||
*
|
||||
* - tlb_remove_table()
|
||||
*
|
||||
* tlb_remove_table() is the basic primitive to free page-table directories
|
||||
|
@ -464,7 +469,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
|||
*/
|
||||
tlb->vma_huge = is_vm_hugetlb_page(vma);
|
||||
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
|
||||
tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
|
||||
|
||||
/*
|
||||
* Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma
|
||||
* in the tracked range, see tlb_free_vmas().
|
||||
*/
|
||||
tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
|
||||
}
|
||||
|
||||
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
|
||||
|
@ -547,23 +557,39 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *
|
|||
}
|
||||
|
||||
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
||||
{
|
||||
if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Do a TLB flush and reset the range at VMA boundaries; this avoids
|
||||
* the ranges growing with the unused space between consecutive VMAs,
|
||||
* but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
|
||||
* this.
|
||||
*/
|
||||
tlb_flush_mmu_tlbonly(tlb);
|
||||
}
|
||||
|
||||
static inline void tlb_free_vmas(struct mmu_gather *tlb)
|
||||
{
|
||||
if (tlb->fullmm)
|
||||
return;
|
||||
|
||||
/*
|
||||
* VM_PFNMAP is more fragile because the core mm will not track the
|
||||
* page mapcount -- there might not be page-frames for these PFNs after
|
||||
* all. Force flush TLBs for such ranges to avoid munmap() vs
|
||||
* unmap_mapping_range() races.
|
||||
* page mapcount -- there might not be page-frames for these PFNs
|
||||
* after all.
|
||||
*
|
||||
* Specifically() there is a race between munmap() and
|
||||
* unmap_mapping_range(), where munmap() will unlink the VMA, such
|
||||
* that unmap_mapping_range() will no longer observe the VMA and
|
||||
* no-op, without observing the TLBI, returning prematurely.
|
||||
*
|
||||
* So if we're about to unlink such a VMA, and we have pending
|
||||
* TLBI for such a vma, flush things now.
|
||||
*/
|
||||
if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
|
||||
/*
|
||||
* Do a TLB flush and reset the range at VMA boundaries; this avoids
|
||||
* the ranges growing with the unused space between consecutive VMAs.
|
||||
*/
|
||||
if (tlb->vma_pfn)
|
||||
tlb_flush_mmu_tlbonly(tlb);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -113,6 +113,12 @@ struct mem_cgroup_per_node {
|
|||
CACHELINE_PADDING(_pad2_);
|
||||
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
|
||||
struct mem_cgroup_reclaim_iter iter;
|
||||
|
||||
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
|
||||
/* slab stats for nmi context */
|
||||
atomic_t slab_reclaimable;
|
||||
atomic_t slab_unreclaimable;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct mem_cgroup_threshold {
|
||||
|
@ -236,6 +242,10 @@ struct mem_cgroup {
|
|||
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
|
||||
atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS];
|
||||
|
||||
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
|
||||
/* MEMCG_KMEM for nmi context */
|
||||
atomic_t kmem_stat;
|
||||
#endif
|
||||
/*
|
||||
* Hint of reclaim pressure for socket memroy management. Note
|
||||
* that this indicator should NOT be used in legacy cgroup mode
|
||||
|
|
|
@ -1276,9 +1276,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf);
|
|||
* the page's disk buffers. PG_private must be set to tell the VM to call
|
||||
* into the filesystem to release these pages.
|
||||
*
|
||||
* A page may belong to an inode's memory mapping. In this case, page->mapping
|
||||
* is the pointer to the inode, and page->index is the file offset of the page,
|
||||
* in units of PAGE_SIZE.
|
||||
* A folio may belong to an inode's memory mapping. In this case,
|
||||
* folio->mapping points to the inode, and folio->index is the file
|
||||
* offset of the folio, in units of PAGE_SIZE.
|
||||
*
|
||||
* If pagecache pages are not associated with an inode, they are said to be
|
||||
* anonymous pages. These may become associated with the swapcache, and in that
|
||||
|
|
|
@ -108,7 +108,7 @@ struct page {
|
|||
/* See page-flags.h for PAGE_MAPPING_FLAGS */
|
||||
struct address_space *mapping;
|
||||
union {
|
||||
pgoff_t index; /* Our offset within mapping. */
|
||||
pgoff_t __folio_index; /* Our offset within mapping. */
|
||||
unsigned long share; /* share count for fsdax */
|
||||
};
|
||||
/**
|
||||
|
@ -489,7 +489,7 @@ FOLIO_MATCH(flags, flags);
|
|||
FOLIO_MATCH(lru, lru);
|
||||
FOLIO_MATCH(mapping, mapping);
|
||||
FOLIO_MATCH(compound_head, lru);
|
||||
FOLIO_MATCH(index, index);
|
||||
FOLIO_MATCH(__folio_index, index);
|
||||
FOLIO_MATCH(private, private);
|
||||
FOLIO_MATCH(_mapcount, _mapcount);
|
||||
FOLIO_MATCH(_refcount, _refcount);
|
||||
|
@ -590,7 +590,7 @@ TABLE_MATCH(flags, __page_flags);
|
|||
TABLE_MATCH(compound_head, pt_list);
|
||||
TABLE_MATCH(compound_head, _pt_pad_1);
|
||||
TABLE_MATCH(mapping, __page_mapping);
|
||||
TABLE_MATCH(index, pt_index);
|
||||
TABLE_MATCH(__folio_index, pt_index);
|
||||
TABLE_MATCH(rcu_head, pt_rcu_head);
|
||||
TABLE_MATCH(page_type, __page_type);
|
||||
TABLE_MATCH(_refcount, __page_refcount);
|
||||
|
|
|
@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
|
|||
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
|
||||
#define ptep_clear_young_notify ptep_test_and_clear_young
|
||||
#define pmdp_clear_young_notify pmdp_test_and_clear_young
|
||||
#define ptep_clear_flush_notify ptep_clear_flush
|
||||
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
|
||||
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
|
||||
|
||||
static inline void mmu_notifier_synchronize(void)
|
||||
{
|
||||
|
|
|
@ -37,6 +37,22 @@
|
|||
|
||||
#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1)
|
||||
|
||||
/* Defines the order for the number of pages that have a migrate type. */
|
||||
#ifndef CONFIG_PAGE_BLOCK_ORDER
|
||||
#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER
|
||||
#else
|
||||
#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
|
||||
#endif /* CONFIG_PAGE_BLOCK_ORDER */
|
||||
|
||||
/*
|
||||
* The MAX_PAGE_ORDER, which defines the max order of pages to be allocated
|
||||
* by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER,
|
||||
* which defines the order for the number of pages that can have a migrate type
|
||||
*/
|
||||
#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER)
|
||||
#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
|
||||
* costly to service. That is between allocation orders which should
|
||||
|
|
|
@ -41,18 +41,18 @@ extern unsigned int pageblock_order;
|
|||
* Huge pages are a constant size, but don't exceed the maximum allocation
|
||||
* granularity.
|
||||
*/
|
||||
#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER)
|
||||
#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER)
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
||||
|
||||
#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
|
||||
#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER)
|
||||
#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER)
|
||||
|
||||
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
||||
#define pageblock_order MAX_PAGE_ORDER
|
||||
/* If huge pages are not used, group by PAGE_BLOCK_ORDER */
|
||||
#define pageblock_order PAGE_BLOCK_ORDER
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
|
|
|
@ -548,6 +548,10 @@ struct sched_statistics {
|
|||
u64 nr_failed_migrations_running;
|
||||
u64 nr_failed_migrations_hot;
|
||||
u64 nr_forced_migrations;
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
u64 numa_task_migrated;
|
||||
u64 numa_task_swapped;
|
||||
#endif
|
||||
|
||||
u64 nr_wakeups;
|
||||
u64 nr_wakeups_sync;
|
||||
|
|
|
@ -182,8 +182,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
|
||||
size_t bytes, struct iov_iter *i);
|
||||
void iov_iter_advance(struct iov_iter *i, size_t bytes);
|
||||
void iov_iter_revert(struct iov_iter *i, size_t bytes);
|
||||
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
|
||||
|
@ -193,6 +191,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
|
|||
struct iov_iter *i);
|
||||
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
|
||||
struct iov_iter *i);
|
||||
size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset,
|
||||
size_t bytes, struct iov_iter *i);
|
||||
|
||||
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
||||
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
|
||||
|
@ -210,12 +210,6 @@ static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset,
|
|||
return copy_page_from_iter(&folio->page, offset, bytes, i);
|
||||
}
|
||||
|
||||
static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
|
||||
size_t offset, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
|
||||
}
|
||||
|
||||
size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
|
||||
size_t bytes, struct iov_iter *i);
|
||||
|
||||
|
|
|
@ -66,6 +66,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
|||
NUMA_HINT_FAULTS,
|
||||
NUMA_HINT_FAULTS_LOCAL,
|
||||
NUMA_PAGE_MIGRATE,
|
||||
NUMA_TASK_MIGRATE,
|
||||
NUMA_TASK_SWAP,
|
||||
#endif
|
||||
#ifdef CONFIG_MIGRATION
|
||||
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
|
||||
|
|
14
init/Kconfig
14
init/Kconfig
|
@ -992,6 +992,20 @@ config MEMCG
|
|||
help
|
||||
Provides control over the memory footprint of tasks in a cgroup.
|
||||
|
||||
config MEMCG_NMI_UNSAFE
|
||||
bool
|
||||
depends on MEMCG
|
||||
depends on HAVE_NMI
|
||||
depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
default y
|
||||
|
||||
config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
|
||||
bool
|
||||
depends on MEMCG
|
||||
depends on HAVE_NMI
|
||||
depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
default y
|
||||
|
||||
config MEMCG_V1
|
||||
bool "Legacy cgroup v1 memory controller"
|
||||
depends on MEMCG
|
||||
|
|
|
@ -531,7 +531,7 @@ static u64 get_inode_sequence_number(struct inode *inode)
|
|||
*
|
||||
* For shared mappings (when @fshared), the key is:
|
||||
*
|
||||
* ( inode->i_sequence, page->index, offset_within_page )
|
||||
* ( inode->i_sequence, page offset within mapping, offset_within_page )
|
||||
*
|
||||
* [ also see get_inode_sequence_number() ]
|
||||
*
|
||||
|
|
|
@ -3362,6 +3362,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
|||
#ifdef CONFIG_NUMA_BALANCING
|
||||
static void __migrate_swap_task(struct task_struct *p, int cpu)
|
||||
{
|
||||
__schedstat_inc(p->stats.numa_task_swapped);
|
||||
count_vm_numa_event(NUMA_TASK_SWAP);
|
||||
count_memcg_event_mm(p->mm, NUMA_TASK_SWAP);
|
||||
|
||||
if (task_on_rq_queued(p)) {
|
||||
struct rq *src_rq, *dst_rq;
|
||||
struct rq_flags srf, drf;
|
||||
|
@ -7930,8 +7934,9 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
|
|||
if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
|
||||
return -EINVAL;
|
||||
|
||||
/* TODO: This is not properly updating schedstats */
|
||||
|
||||
__schedstat_inc(p->stats.numa_task_migrated);
|
||||
count_vm_numa_event(NUMA_TASK_MIGRATE);
|
||||
count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE);
|
||||
trace_sched_move_numa(p, curr_cpu, target_cpu);
|
||||
return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
|
||||
}
|
||||
|
|
|
@ -1210,6 +1210,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
|||
P_SCHEDSTAT(nr_failed_migrations_running);
|
||||
P_SCHEDSTAT(nr_failed_migrations_hot);
|
||||
P_SCHEDSTAT(nr_forced_migrations);
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
P_SCHEDSTAT(numa_task_migrated);
|
||||
P_SCHEDSTAT(numa_task_swapped);
|
||||
#endif
|
||||
P_SCHEDSTAT(nr_wakeups);
|
||||
P_SCHEDSTAT(nr_wakeups_sync);
|
||||
P_SCHEDSTAT(nr_wakeups_migrate);
|
||||
|
|
|
@ -2273,7 +2273,8 @@ static bool task_numa_compare(struct task_numa_env *env,
|
|||
|
||||
rcu_read_lock();
|
||||
cur = rcu_dereference(dst_rq->curr);
|
||||
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
|
||||
if (cur && ((cur->flags & (PF_EXITING | PF_KTHREAD)) ||
|
||||
!cur->mm))
|
||||
cur = NULL;
|
||||
|
||||
/*
|
||||
|
|
|
@ -457,38 +457,35 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
|
|||
}
|
||||
EXPORT_SYMBOL(iov_iter_zero);
|
||||
|
||||
size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
|
||||
size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset,
|
||||
size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
size_t n, copied = 0;
|
||||
bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
|
||||
PageHighMem(page);
|
||||
|
||||
if (!page_copy_sane(page, offset, bytes))
|
||||
if (!page_copy_sane(&folio->page, offset, bytes))
|
||||
return 0;
|
||||
if (WARN_ON_ONCE(!i->data_source))
|
||||
return 0;
|
||||
|
||||
do {
|
||||
char *p;
|
||||
char *to = kmap_local_folio(folio, offset);
|
||||
|
||||
n = bytes - copied;
|
||||
if (uses_kmap) {
|
||||
page += offset / PAGE_SIZE;
|
||||
offset %= PAGE_SIZE;
|
||||
n = min_t(size_t, n, PAGE_SIZE - offset);
|
||||
}
|
||||
if (folio_test_partial_kmap(folio) &&
|
||||
n > PAGE_SIZE - offset_in_page(offset))
|
||||
n = PAGE_SIZE - offset_in_page(offset);
|
||||
|
||||
p = kmap_atomic(page) + offset;
|
||||
n = __copy_from_iter(p, n, i);
|
||||
kunmap_atomic(p);
|
||||
pagefault_disable();
|
||||
n = __copy_from_iter(to, n, i);
|
||||
pagefault_enable();
|
||||
kunmap_local(to);
|
||||
copied += n;
|
||||
offset += n;
|
||||
} while (uses_kmap && copied != bytes && n > 0);
|
||||
} while (copied != bytes && n > 0);
|
||||
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL(copy_page_from_iter_atomic);
|
||||
EXPORT_SYMBOL(copy_folio_from_iter_atomic);
|
||||
|
||||
static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
|
||||
{
|
||||
|
|
34
mm/Kconfig
34
mm/Kconfig
|
@ -993,6 +993,40 @@ config CMA_AREAS
|
|||
|
||||
If unsure, leave the default value "8" in UMA and "20" in NUMA.
|
||||
|
||||
#
|
||||
# Select this config option from the architecture Kconfig, if available, to set
|
||||
# the max page order for physically contiguous allocations.
|
||||
#
|
||||
config ARCH_FORCE_MAX_ORDER
|
||||
int
|
||||
|
||||
#
|
||||
# When ARCH_FORCE_MAX_ORDER is not defined,
|
||||
# the default page block order is MAX_PAGE_ORDER (10) as per
|
||||
# include/linux/mmzone.h.
|
||||
#
|
||||
config PAGE_BLOCK_ORDER
|
||||
int "Page Block Order"
|
||||
range 1 10 if ARCH_FORCE_MAX_ORDER = 0
|
||||
default 10 if ARCH_FORCE_MAX_ORDER = 0
|
||||
range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
|
||||
default ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
|
||||
help
|
||||
The page block order refers to the power of two number of pages that
|
||||
are physically contiguous and can have a migrate type associated to
|
||||
them. The maximum size of the page block order is limited by
|
||||
ARCH_FORCE_MAX_ORDER.
|
||||
|
||||
This config allows overriding the default page block order when the
|
||||
page block order is required to be smaller than ARCH_FORCE_MAX_ORDER
|
||||
or MAX_PAGE_ORDER.
|
||||
|
||||
Reducing pageblock order can negatively impact THP generation
|
||||
success rate. If your workloads uses THP heavily, please use this
|
||||
option with caution.
|
||||
|
||||
Don't change if unsure.
|
||||
|
||||
config MEM_SOFT_DIRTY
|
||||
bool "Track memory changes"
|
||||
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
|
||||
|
|
|
@ -4,6 +4,7 @@ menu "Data Access Monitoring"
|
|||
|
||||
config DAMON
|
||||
bool "DAMON: Data Access Monitoring Framework"
|
||||
default y
|
||||
help
|
||||
This builds a framework that allows kernel subsystems to monitor
|
||||
access frequency of each memory region. The information can be useful
|
||||
|
@ -28,6 +29,7 @@ config DAMON_VADDR
|
|||
bool "Data access monitoring operations for virtual address spaces"
|
||||
depends on DAMON && MMU
|
||||
select PAGE_IDLE_FLAG
|
||||
default DAMON
|
||||
help
|
||||
This builds the default data access monitoring operations for DAMON
|
||||
that work for virtual address spaces.
|
||||
|
@ -36,6 +38,7 @@ config DAMON_PADDR
|
|||
bool "Data access monitoring operations for the physical address space"
|
||||
depends on DAMON && MMU
|
||||
select PAGE_IDLE_FLAG
|
||||
default DAMON
|
||||
help
|
||||
This builds the default data access monitoring operations for DAMON
|
||||
that works for the physical address space.
|
||||
|
@ -55,6 +58,7 @@ config DAMON_VADDR_KUNIT_TEST
|
|||
config DAMON_SYSFS
|
||||
bool "DAMON sysfs interface"
|
||||
depends on DAMON && SYSFS
|
||||
default DAMON
|
||||
help
|
||||
This builds the sysfs interface for DAMON. The user space can use
|
||||
the interface for arbitrary data access monitoring.
|
||||
|
|
|
@ -1093,9 +1093,17 @@ static int damon_commit_targets(
|
|||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
struct damos *s;
|
||||
|
||||
if (damon_target_has_pid(dst))
|
||||
put_pid(dst_target->pid);
|
||||
damon_destroy_target(dst_target);
|
||||
damon_for_each_scheme(s, dst) {
|
||||
if (s->quota.charge_target_from == dst_target) {
|
||||
s->quota.charge_target_from = NULL;
|
||||
s->quota.charge_addr_from = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -142,7 +142,7 @@ static void page_cache_delete(struct address_space *mapping,
|
|||
xas_init_marks(&xas);
|
||||
|
||||
folio->mapping = NULL;
|
||||
/* Leave page->index set: truncation lookup relies upon it */
|
||||
/* Leave folio->index set: truncation lookup relies upon it */
|
||||
mapping->nrpages -= nr;
|
||||
}
|
||||
|
||||
|
@ -949,7 +949,7 @@ unlock:
|
|||
return 0;
|
||||
error:
|
||||
folio->mapping = NULL;
|
||||
/* Leave page->index set: truncation relies upon it */
|
||||
/* Leave folio->index set: truncation relies upon it */
|
||||
folio_put_refs(folio, nr);
|
||||
return xas_error(&xas);
|
||||
}
|
||||
|
|
2
mm/gup.c
2
mm/gup.c
|
@ -3299,7 +3299,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end,
|
|||
* include/asm-generic/tlb.h for more details.
|
||||
*
|
||||
* We do not adopt an rcu_read_lock() here as we also want to block IPIs
|
||||
* that come from THPs splitting.
|
||||
* that come from callers of tlb_remove_table_sync_one().
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned);
|
||||
|
|
|
@ -3741,7 +3741,7 @@ static void __init report_hugepages(void)
|
|||
|
||||
string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
|
||||
pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
|
||||
buf, h->free_huge_pages);
|
||||
buf, h->nr_huge_pages);
|
||||
if (nrinvalid)
|
||||
pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n",
|
||||
buf, nrinvalid, nrinvalid > 1 ? "s" : "");
|
||||
|
|
|
@ -548,19 +548,6 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_refcount_suitable(struct folio *folio)
|
||||
{
|
||||
int expected_refcount = folio_mapcount(folio);
|
||||
|
||||
if (!folio_test_anon(folio) || folio_test_swapcache(folio))
|
||||
expected_refcount += folio_nr_pages(folio);
|
||||
|
||||
if (folio_test_private(folio))
|
||||
expected_refcount++;
|
||||
|
||||
return folio_ref_count(folio) == expected_refcount;
|
||||
}
|
||||
|
||||
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pte_t *pte,
|
||||
|
@ -652,7 +639,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
|||
* but not from this process. The other process cannot write to
|
||||
* the page, only trigger CoW.
|
||||
*/
|
||||
if (!is_refcount_suitable(folio)) {
|
||||
if (folio_expected_ref_count(folio) != folio_ref_count(folio)) {
|
||||
folio_unlock(folio);
|
||||
result = SCAN_PAGE_COUNT;
|
||||
goto out;
|
||||
|
@ -1402,7 +1389,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
|
|||
* has excessive GUP pins (i.e. 512). Anyway the same check
|
||||
* will be done again later the risk seems low.
|
||||
*/
|
||||
if (!is_refcount_suitable(folio)) {
|
||||
if (folio_expected_ref_count(folio) != folio_ref_count(folio)) {
|
||||
result = SCAN_PAGE_COUNT;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
@ -2293,6 +2280,17 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!folio_try_get(folio)) {
|
||||
xas_reset(&xas);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(folio != xas_reload(&xas))) {
|
||||
folio_put(folio);
|
||||
xas_reset(&xas);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (folio_order(folio) == HPAGE_PMD_ORDER &&
|
||||
folio->index == start) {
|
||||
/* Maybe PMD-mapped */
|
||||
|
@ -2303,23 +2301,27 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
|
|||
* it's safe to skip LRU and refcount checks before
|
||||
* returning.
|
||||
*/
|
||||
folio_put(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
node = folio_nid(folio);
|
||||
if (hpage_collapse_scan_abort(node, cc)) {
|
||||
result = SCAN_SCAN_ABORT;
|
||||
folio_put(folio);
|
||||
break;
|
||||
}
|
||||
cc->node_load[node]++;
|
||||
|
||||
if (!folio_test_lru(folio)) {
|
||||
result = SCAN_PAGE_LRU;
|
||||
folio_put(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!is_refcount_suitable(folio)) {
|
||||
if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
|
||||
result = SCAN_PAGE_COUNT;
|
||||
folio_put(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2331,6 +2333,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
|
|||
*/
|
||||
|
||||
present += folio_nr_pages(folio);
|
||||
folio_put(folio);
|
||||
|
||||
if (need_resched()) {
|
||||
xas_pause(&xas);
|
||||
|
|
127
mm/memcontrol.c
127
mm/memcontrol.c
|
@ -474,6 +474,8 @@ static const unsigned int memcg_vm_event_stat[] = {
|
|||
NUMA_PAGE_MIGRATE,
|
||||
NUMA_PTE_UPDATES,
|
||||
NUMA_HINT_FAULTS,
|
||||
NUMA_TASK_MIGRATE,
|
||||
NUMA_TASK_SWAP,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -531,7 +533,7 @@ struct memcg_vmstats {
|
|||
unsigned long events_pending[NR_MEMCG_EVENTS];
|
||||
|
||||
/* Stats updates since the last flush */
|
||||
atomic64_t stats_updates;
|
||||
atomic_t stats_updates;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -557,7 +559,7 @@ static u64 flush_last_time;
|
|||
|
||||
static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats)
|
||||
{
|
||||
return atomic64_read(&vmstats->stats_updates) >
|
||||
return atomic_read(&vmstats->stats_updates) >
|
||||
MEMCG_CHARGE_BATCH * num_online_cpus();
|
||||
}
|
||||
|
||||
|
@ -571,7 +573,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val,
|
|||
if (!val)
|
||||
return;
|
||||
|
||||
css_rstat_updated(&memcg->css, cpu);
|
||||
/* TODO: add to cgroup update tree once it is nmi-safe. */
|
||||
if (!in_nmi())
|
||||
css_rstat_updated(&memcg->css, cpu);
|
||||
statc_pcpu = memcg->vmstats_percpu;
|
||||
for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) {
|
||||
statc = this_cpu_ptr(statc_pcpu);
|
||||
|
@ -589,7 +593,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val,
|
|||
continue;
|
||||
|
||||
stats_updates = this_cpu_xchg(statc_pcpu->stats_updates, 0);
|
||||
atomic64_add(stats_updates, &statc->vmstats->stats_updates);
|
||||
atomic_add(stats_updates, &statc->vmstats->stats_updates);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -597,7 +601,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force)
|
|||
{
|
||||
bool needs_flush = memcg_vmstats_needs_flush(memcg->vmstats);
|
||||
|
||||
trace_memcg_flush_stats(memcg, atomic64_read(&memcg->vmstats->stats_updates),
|
||||
trace_memcg_flush_stats(memcg, atomic_read(&memcg->vmstats->stats_updates),
|
||||
force, needs_flush);
|
||||
|
||||
if (!force && !needs_flush)
|
||||
|
@ -2513,17 +2517,47 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
|
|||
folio->memcg_data = (unsigned long)memcg;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
|
||||
static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
|
||||
struct pglist_data *pgdat,
|
||||
enum node_stat_item idx, int nr)
|
||||
{
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (likely(!in_nmi())) {
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
mod_memcg_lruvec_state(lruvec, idx, nr);
|
||||
} else {
|
||||
struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id];
|
||||
|
||||
/* TODO: add to cgroup update tree once it is nmi-safe. */
|
||||
if (idx == NR_SLAB_RECLAIMABLE_B)
|
||||
atomic_add(nr, &pn->slab_reclaimable);
|
||||
else
|
||||
atomic_add(nr, &pn->slab_unreclaimable);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
|
||||
struct pglist_data *pgdat,
|
||||
enum node_stat_item idx, int nr)
|
||||
{
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
mod_memcg_lruvec_state(lruvec, idx, nr);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
|
||||
struct pglist_data *pgdat,
|
||||
enum node_stat_item idx, int nr)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
rcu_read_lock();
|
||||
memcg = obj_cgroup_memcg(objcg);
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
mod_memcg_lruvec_state(lruvec, idx, nr);
|
||||
account_slab_nmi_safe(memcg, pgdat, idx, nr);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -2648,6 +2682,9 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
|
|||
struct mem_cgroup *memcg;
|
||||
struct obj_cgroup *objcg;
|
||||
|
||||
if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi())
|
||||
return NULL;
|
||||
|
||||
if (in_task()) {
|
||||
memcg = current->active_memcg;
|
||||
if (unlikely(memcg))
|
||||
|
@ -2710,6 +2747,23 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
|
|||
return objcg;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
|
||||
static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
|
||||
{
|
||||
if (likely(!in_nmi())) {
|
||||
mod_memcg_state(memcg, MEMCG_KMEM, val);
|
||||
} else {
|
||||
/* TODO: add to cgroup update tree once it is nmi-safe. */
|
||||
atomic_add(val, &memcg->kmem_stat);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
|
||||
{
|
||||
mod_memcg_state(memcg, MEMCG_KMEM, val);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
|
||||
* @objcg: object cgroup to uncharge
|
||||
|
@ -2722,7 +2776,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
|
|||
|
||||
memcg = get_mem_cgroup_from_objcg(objcg);
|
||||
|
||||
mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages);
|
||||
account_kmem_nmi_safe(memcg, -nr_pages);
|
||||
memcg1_account_kmem(memcg, -nr_pages);
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
refill_stock(memcg, nr_pages);
|
||||
|
@ -2750,7 +2804,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
|
||||
account_kmem_nmi_safe(memcg, nr_pages);
|
||||
memcg1_account_kmem(memcg, nr_pages);
|
||||
out:
|
||||
css_put(&memcg->css);
|
||||
|
@ -3961,6 +4015,53 @@ static void mem_cgroup_stat_aggregate(struct aggregate_control *ac)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
|
||||
static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
|
||||
int cpu)
|
||||
{
|
||||
int nid;
|
||||
|
||||
if (atomic_read(&memcg->kmem_stat)) {
|
||||
int kmem = atomic_xchg(&memcg->kmem_stat, 0);
|
||||
int index = memcg_stats_index(MEMCG_KMEM);
|
||||
|
||||
memcg->vmstats->state[index] += kmem;
|
||||
if (parent)
|
||||
parent->vmstats->state_pending[index] += kmem;
|
||||
}
|
||||
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
|
||||
struct lruvec_stats *lstats = pn->lruvec_stats;
|
||||
struct lruvec_stats *plstats = NULL;
|
||||
|
||||
if (parent)
|
||||
plstats = parent->nodeinfo[nid]->lruvec_stats;
|
||||
|
||||
if (atomic_read(&pn->slab_reclaimable)) {
|
||||
int slab = atomic_xchg(&pn->slab_reclaimable, 0);
|
||||
int index = memcg_stats_index(NR_SLAB_RECLAIMABLE_B);
|
||||
|
||||
lstats->state[index] += slab;
|
||||
if (plstats)
|
||||
plstats->state_pending[index] += slab;
|
||||
}
|
||||
if (atomic_read(&pn->slab_unreclaimable)) {
|
||||
int slab = atomic_xchg(&pn->slab_unreclaimable, 0);
|
||||
int index = memcg_stats_index(NR_SLAB_UNRECLAIMABLE_B);
|
||||
|
||||
lstats->state[index] += slab;
|
||||
if (plstats)
|
||||
plstats->state_pending[index] += slab;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
|
||||
int cpu)
|
||||
{}
|
||||
#endif
|
||||
|
||||
static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
@ -3969,6 +4070,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
|||
struct aggregate_control ac;
|
||||
int nid;
|
||||
|
||||
flush_nmi_stats(memcg, parent, cpu);
|
||||
|
||||
statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
|
||||
|
||||
ac = (struct aggregate_control) {
|
||||
|
@ -4018,8 +4121,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
|||
}
|
||||
WRITE_ONCE(statc->stats_updates, 0);
|
||||
/* We are in a per-cpu loop here, only do the atomic write once */
|
||||
if (atomic64_read(&memcg->vmstats->stats_updates))
|
||||
atomic64_set(&memcg->vmstats->stats_updates, 0);
|
||||
if (atomic_read(&memcg->vmstats->stats_updates))
|
||||
atomic_set(&memcg->vmstats->stats_updates, 0);
|
||||
}
|
||||
|
||||
static void mem_cgroup_fork(struct task_struct *task)
|
||||
|
|
|
@ -358,6 +358,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
|||
{
|
||||
struct unlink_vma_file_batch vb;
|
||||
|
||||
tlb_free_vmas(tlb);
|
||||
|
||||
do {
|
||||
unsigned long addr = vma->vm_start;
|
||||
struct vm_area_struct *next;
|
||||
|
@ -4668,8 +4670,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
|||
|
||||
/*
|
||||
* KSM sometimes has to copy on read faults, for example, if
|
||||
* page->index of !PageKSM() pages would be nonlinear inside the
|
||||
* anon VMA -- PageKSM() is lost on actual swapout.
|
||||
* folio->index of non-ksm folios would be nonlinear inside the
|
||||
* anon VMA -- the ksm flag is lost on actual swapout.
|
||||
*/
|
||||
folio = ksm_might_need_to_copy(folio, vma, vmf->address);
|
||||
if (unlikely(!folio)) {
|
||||
|
|
|
@ -1509,7 +1509,7 @@ static inline void setup_usemap(struct zone *zone) {}
|
|||
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
|
||||
void __init set_pageblock_order(void)
|
||||
{
|
||||
unsigned int order = MAX_PAGE_ORDER;
|
||||
unsigned int order = PAGE_BLOCK_ORDER;
|
||||
|
||||
/* Check that pageblock_nr_pages has not already been setup */
|
||||
if (pageblock_order)
|
||||
|
|
|
@ -424,6 +424,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
|
|||
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
|
||||
tlb->page_size = 0;
|
||||
#endif
|
||||
tlb->vma_pfn = 0;
|
||||
|
||||
__tlb_reset_range(tlb);
|
||||
inc_tlb_flush_pending(tlb->mm);
|
||||
|
|
|
@ -2565,11 +2565,11 @@ struct folio *writeback_iter(struct address_space *mapping,
|
|||
if (!folio) {
|
||||
/*
|
||||
* To avoid deadlocks between range_cyclic writeback and callers
|
||||
* that hold pages in PageWriteback to aggregate I/O until
|
||||
* that hold folios in writeback to aggregate I/O until
|
||||
* the writeback iteration finishes, we do not loop back to the
|
||||
* start of the file. Doing so causes a page lock/page
|
||||
* start of the file. Doing so causes a folio lock/folio
|
||||
* writeback access order inversion - we should only ever lock
|
||||
* multiple pages in ascending page->index order, and looping
|
||||
* multiple folios in ascending folio->index order, and looping
|
||||
* back to the start of the file violates that rule and causes
|
||||
* deadlocks.
|
||||
*/
|
||||
|
|
23
mm/shmem.c
23
mm/shmem.c
|
@ -1446,8 +1446,6 @@ static int shmem_unuse_swap_entries(struct inode *inode,
|
|||
for (i = 0; i < folio_batch_count(fbatch); i++) {
|
||||
struct folio *folio = fbatch->folios[i];
|
||||
|
||||
if (!xa_is_value(folio))
|
||||
continue;
|
||||
error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE,
|
||||
mapping_gfp_mask(mapping), NULL, NULL);
|
||||
if (error == 0) {
|
||||
|
@ -1505,6 +1503,7 @@ int shmem_unuse(unsigned int type)
|
|||
return 0;
|
||||
|
||||
mutex_lock(&shmem_swaplist_mutex);
|
||||
start_over:
|
||||
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
|
||||
if (!info->swapped) {
|
||||
list_del_init(&info->swaplist);
|
||||
|
@ -1523,13 +1522,15 @@ int shmem_unuse(unsigned int type)
|
|||
cond_resched();
|
||||
|
||||
mutex_lock(&shmem_swaplist_mutex);
|
||||
next = list_next_entry(info, swaplist);
|
||||
if (!info->swapped)
|
||||
list_del_init(&info->swaplist);
|
||||
if (atomic_dec_and_test(&info->stop_eviction))
|
||||
wake_up_var(&info->stop_eviction);
|
||||
if (error)
|
||||
break;
|
||||
if (list_empty(&info->swaplist))
|
||||
goto start_over;
|
||||
next = list_next_entry(info, swaplist);
|
||||
if (!info->swapped)
|
||||
list_del_init(&info->swaplist);
|
||||
}
|
||||
mutex_unlock(&shmem_swaplist_mutex);
|
||||
|
||||
|
@ -1643,8 +1644,8 @@ try_split:
|
|||
BUG_ON(folio_mapped(folio));
|
||||
return swap_writeout(folio, wbc);
|
||||
}
|
||||
|
||||
list_del_init(&info->swaplist);
|
||||
if (!info->swapped)
|
||||
list_del_init(&info->swaplist);
|
||||
mutex_unlock(&shmem_swaplist_mutex);
|
||||
if (nr_pages > 1)
|
||||
goto try_split;
|
||||
|
@ -2331,6 +2332,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
|
|||
*/
|
||||
split_order = shmem_split_large_entry(inode, index, swap, gfp);
|
||||
if (split_order < 0) {
|
||||
folio_put(folio);
|
||||
folio = NULL;
|
||||
error = split_order;
|
||||
goto failed;
|
||||
}
|
||||
|
@ -5805,12 +5808,12 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
|
|||
if (size < 0 || size > MAX_LFS_FILESIZE)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (shmem_acct_size(flags, size))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (is_idmapped_mnt(mnt))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (shmem_acct_size(flags, size))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
|
||||
S_IFREG | S_IRWXUGO, 0, flags);
|
||||
if (IS_ERR(inode)) {
|
||||
|
|
|
@ -425,7 +425,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
|||
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
||||
struct folio *folio = fbatch.folios[i];
|
||||
|
||||
/* We rely upon deletion not changing page->index */
|
||||
/* We rely upon deletion not changing folio->index */
|
||||
|
||||
if (xa_is_value(folio))
|
||||
continue;
|
||||
|
|
|
@ -1347,6 +1347,8 @@ const char * const vmstat_text[] = {
|
|||
"numa_hint_faults",
|
||||
"numa_hint_faults_local",
|
||||
"numa_pages_migrated",
|
||||
"numa_task_migrated",
|
||||
"numa_task_swapped",
|
||||
#endif
|
||||
#ifdef CONFIG_MIGRATION
|
||||
"pgmigrate_success",
|
||||
|
|
|
@ -54,8 +54,8 @@ struct zpdesc {
|
|||
ZPDESC_MATCH(flags, flags);
|
||||
ZPDESC_MATCH(lru, lru);
|
||||
ZPDESC_MATCH(mapping, movable_ops);
|
||||
ZPDESC_MATCH(index, next);
|
||||
ZPDESC_MATCH(index, handle);
|
||||
ZPDESC_MATCH(__folio_index, next);
|
||||
ZPDESC_MATCH(__folio_index, handle);
|
||||
ZPDESC_MATCH(private, zspage);
|
||||
ZPDESC_MATCH(page_type, first_obj_offset);
|
||||
ZPDESC_MATCH(_refcount, _refcount);
|
||||
|
|
|
@ -492,6 +492,7 @@ $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs \
|
|||
ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
|
||||
$(obj)/core.o: scripts/target.json
|
||||
endif
|
||||
KCOV_INSTRUMENT_core.o := n
|
||||
|
||||
$(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1
|
||||
$(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*'
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
//! control what happens when userspace reads or writes to that region of memory.
|
||||
//!
|
||||
//! C header: [`include/linux/mm.h`](srctree/include/linux/mm.h)
|
||||
#![cfg(CONFIG_MMU)]
|
||||
|
||||
use crate::{
|
||||
bindings,
|
||||
|
@ -21,6 +20,10 @@ use core::{ops::Deref, ptr::NonNull};
|
|||
pub mod virt;
|
||||
use virt::VmaRef;
|
||||
|
||||
#[cfg(CONFIG_MMU)]
|
||||
pub use mmput_async::MmWithUserAsync;
|
||||
mod mmput_async;
|
||||
|
||||
/// A wrapper for the kernel's `struct mm_struct`.
|
||||
///
|
||||
/// This represents the address space of a userspace process, so each process has one `Mm`
|
||||
|
@ -111,50 +114,6 @@ impl Deref for MmWithUser {
|
|||
}
|
||||
}
|
||||
|
||||
/// A wrapper for the kernel's `struct mm_struct`.
|
||||
///
|
||||
/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a
|
||||
/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic
|
||||
/// context.
|
||||
///
|
||||
/// # Invariants
|
||||
///
|
||||
/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero.
|
||||
#[repr(transparent)]
|
||||
pub struct MmWithUserAsync {
|
||||
mm: MmWithUser,
|
||||
}
|
||||
|
||||
// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called.
|
||||
unsafe impl Send for MmWithUserAsync {}
|
||||
// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads.
|
||||
unsafe impl Sync for MmWithUserAsync {}
|
||||
|
||||
// SAFETY: By the type invariants, this type is always refcounted.
|
||||
unsafe impl AlwaysRefCounted for MmWithUserAsync {
|
||||
#[inline]
|
||||
fn inc_ref(&self) {
|
||||
// SAFETY: The pointer is valid since self is a reference.
|
||||
unsafe { bindings::mmget(self.as_raw()) };
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn dec_ref(obj: NonNull<Self>) {
|
||||
// SAFETY: The caller is giving up their refcount.
|
||||
unsafe { bindings::mmput_async(obj.cast().as_ptr()) };
|
||||
}
|
||||
}
|
||||
|
||||
// Make all `MmWithUser` methods available on `MmWithUserAsync`.
|
||||
impl Deref for MmWithUserAsync {
|
||||
type Target = MmWithUser;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &MmWithUser {
|
||||
&self.mm
|
||||
}
|
||||
}
|
||||
|
||||
// These methods are safe to call even if `mm_users` is zero.
|
||||
impl Mm {
|
||||
/// Returns a raw pointer to the inner `mm_struct`.
|
||||
|
@ -206,13 +165,6 @@ impl MmWithUser {
|
|||
unsafe { &*ptr.cast() }
|
||||
}
|
||||
|
||||
/// Use `mmput_async` when dropping this refcount.
|
||||
#[inline]
|
||||
pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> {
|
||||
// SAFETY: The layouts and invariants are compatible.
|
||||
unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
|
||||
}
|
||||
|
||||
/// Attempt to access a vma using the vma read lock.
|
||||
///
|
||||
/// This is an optimistic trylock operation, so it may fail if there is contention. In that
|
||||
|
|
68
rust/kernel/mm/mmput_async.rs
Normal file
68
rust/kernel/mm/mmput_async.rs
Normal file
|
@ -0,0 +1,68 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
//! Version of `MmWithUser` using `mmput_async`.
|
||||
//!
|
||||
//! This is a separate file from `mm.rs` due to the dependency on `CONFIG_MMU=y`.
|
||||
#![cfg(CONFIG_MMU)]
|
||||
|
||||
use crate::{
|
||||
bindings,
|
||||
mm::MmWithUser,
|
||||
types::{ARef, AlwaysRefCounted},
|
||||
};
|
||||
use core::{ops::Deref, ptr::NonNull};
|
||||
|
||||
/// A wrapper for the kernel's `struct mm_struct`.
|
||||
///
|
||||
/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a
|
||||
/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic
|
||||
/// context.
|
||||
///
|
||||
/// # Invariants
|
||||
///
|
||||
/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero.
|
||||
#[repr(transparent)]
|
||||
pub struct MmWithUserAsync {
|
||||
mm: MmWithUser,
|
||||
}
|
||||
|
||||
// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called.
|
||||
unsafe impl Send for MmWithUserAsync {}
|
||||
// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads.
|
||||
unsafe impl Sync for MmWithUserAsync {}
|
||||
|
||||
// SAFETY: By the type invariants, this type is always refcounted.
|
||||
unsafe impl AlwaysRefCounted for MmWithUserAsync {
|
||||
#[inline]
|
||||
fn inc_ref(&self) {
|
||||
// SAFETY: The pointer is valid since self is a reference.
|
||||
unsafe { bindings::mmget(self.as_raw()) };
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn dec_ref(obj: NonNull<Self>) {
|
||||
// SAFETY: The caller is giving up their refcount.
|
||||
unsafe { bindings::mmput_async(obj.cast().as_ptr()) };
|
||||
}
|
||||
}
|
||||
|
||||
// Make all `MmWithUser` methods available on `MmWithUserAsync`.
|
||||
impl Deref for MmWithUserAsync {
|
||||
type Target = MmWithUser;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &MmWithUser {
|
||||
&self.mm
|
||||
}
|
||||
}
|
||||
|
||||
impl MmWithUser {
|
||||
/// Use `mmput_async` when dropping this refcount.
|
||||
#[inline]
|
||||
pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> {
|
||||
// SAFETY: The layouts and invariants are compatible.
|
||||
unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
|
||||
}
|
||||
}
|
|
@ -2,4 +2,10 @@
|
|||
kcov-flags-y += -fsanitize-coverage=trace-pc
|
||||
kcov-flags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -fsanitize-coverage=trace-cmp
|
||||
|
||||
kcov-rflags-y += -Cpasses=sancov-module
|
||||
kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-level=3
|
||||
kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-trace-pc
|
||||
kcov-rflags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -Cllvm-args=-sanitizer-coverage-trace-compares
|
||||
|
||||
export CFLAGS_KCOV := $(kcov-flags-y)
|
||||
export RUSTFLAGS_KCOV := $(kcov-rflags-y)
|
||||
|
|
|
@ -169,6 +169,9 @@ ifeq ($(CONFIG_KCOV),y)
|
|||
_c_flags += $(if $(patsubst n%,, \
|
||||
$(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \
|
||||
$(CFLAGS_KCOV))
|
||||
_rust_flags += $(if $(patsubst n%,, \
|
||||
$(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \
|
||||
$(RUSTFLAGS_KCOV))
|
||||
endif
|
||||
|
||||
#
|
||||
|
|
|
@ -15,6 +15,10 @@ if sysfs_root is None:
|
|||
print('Seems sysfs not mounted?')
|
||||
exit(ksft_skip)
|
||||
|
||||
if not os.path.exists(sysfs_root):
|
||||
print('Seems DAMON disabled?')
|
||||
exit(ksft_skip)
|
||||
|
||||
def write_file(path, string):
|
||||
"Returns error string if failed, or None otherwise"
|
||||
string = '%s' % string
|
||||
|
|
|
@ -112,9 +112,12 @@ struct comm_pipes {
|
|||
|
||||
static int setup_comm_pipes(struct comm_pipes *comm_pipes)
|
||||
{
|
||||
if (pipe(comm_pipes->child_ready) < 0)
|
||||
if (pipe(comm_pipes->child_ready) < 0) {
|
||||
ksft_perror("pipe()");
|
||||
return -errno;
|
||||
}
|
||||
if (pipe(comm_pipes->parent_ready) < 0) {
|
||||
ksft_perror("pipe()");
|
||||
close(comm_pipes->child_ready[0]);
|
||||
close(comm_pipes->child_ready[1]);
|
||||
return -errno;
|
||||
|
@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
|
|||
|
||||
ret = setup_comm_pipes(&comm_pipes);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("pipe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = fork();
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("fork() failed\n");
|
||||
ksft_perror("fork() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
} else if (!ret) {
|
||||
exit(fn(mem, size, &comm_pipes));
|
||||
|
@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
|
|||
* write-faults by directly mapping pages writable.
|
||||
*/
|
||||
ret = mprotect(mem, size, PROT_READ);
|
||||
ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
write(comm_pipes.parent_ready[1], "0", 1);
|
||||
wait(&ret);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
|
||||
ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
write(comm_pipes.parent_ready[1], "0", 1);
|
||||
wait(&ret);
|
||||
goto close_comm_pipes;
|
||||
|
@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
|
|||
ret = -EINVAL;
|
||||
|
||||
if (!ret) {
|
||||
ksft_test_result_pass("No leak from parent into child\n");
|
||||
log_test_result(KSFT_PASS);
|
||||
} else if (xfail) {
|
||||
/*
|
||||
* With hugetlb, some vmsplice() tests are currently expected to
|
||||
* fail because (a) harder to fix and (b) nobody really cares.
|
||||
* Flag them as expected failure for now.
|
||||
*/
|
||||
ksft_test_result_xfail("Leak from parent into child\n");
|
||||
log_test_result(KSFT_XFAIL);
|
||||
} else {
|
||||
ksft_test_result_fail("Leak from parent into child\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
}
|
||||
close_comm_pipes:
|
||||
close_comm_pipes(&comm_pipes);
|
||||
|
@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
|
|||
|
||||
ret = setup_comm_pipes(&comm_pipes);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("pipe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto free;
|
||||
}
|
||||
|
||||
if (pipe(fds) < 0) {
|
||||
ksft_test_result_fail("pipe() failed\n");
|
||||
ksft_perror("pipe() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
|
||||
if (before_fork) {
|
||||
transferred = vmsplice(fds[1], &iov, 1, 0);
|
||||
if (transferred <= 0) {
|
||||
ksft_test_result_fail("vmsplice() failed\n");
|
||||
ksft_print_msg("vmsplice() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_pipe;
|
||||
}
|
||||
}
|
||||
|
||||
ret = fork();
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("fork() failed\n");
|
||||
ksft_perror("fork() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_pipe;
|
||||
} else if (!ret) {
|
||||
write(comm_pipes.child_ready[1], "0", 1);
|
||||
|
@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
|
|||
if (!before_fork) {
|
||||
transferred = vmsplice(fds[1], &iov, 1, 0);
|
||||
if (transferred <= 0) {
|
||||
ksft_test_result_fail("vmsplice() failed\n");
|
||||
ksft_perror("vmsplice() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
wait(&ret);
|
||||
goto close_pipe;
|
||||
}
|
||||
|
@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
|
|||
while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
|
||||
;
|
||||
if (munmap(mem, size) < 0) {
|
||||
ksft_test_result_fail("munmap() failed\n");
|
||||
ksft_perror("munmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_pipe;
|
||||
}
|
||||
write(comm_pipes.parent_ready[1], "0", 1);
|
||||
|
@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
|
|||
/* Wait until the child is done writing. */
|
||||
wait(&ret);
|
||||
if (!WIFEXITED(ret)) {
|
||||
ksft_test_result_fail("wait() failed\n");
|
||||
ksft_perror("wait() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_pipe;
|
||||
}
|
||||
|
||||
|
@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
|
|||
for (total = 0; total < transferred; total += cur) {
|
||||
cur = read(fds[0], new + total, transferred - total);
|
||||
if (cur < 0) {
|
||||
ksft_test_result_fail("read() failed\n");
|
||||
ksft_perror("read() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_pipe;
|
||||
}
|
||||
}
|
||||
|
||||
if (!memcmp(old, new, transferred)) {
|
||||
ksft_test_result_pass("No leak from child into parent\n");
|
||||
log_test_result(KSFT_PASS);
|
||||
} else if (xfail) {
|
||||
/*
|
||||
* With hugetlb, some vmsplice() tests are currently expected to
|
||||
* fail because (a) harder to fix and (b) nobody really cares.
|
||||
* Flag them as expected failure for now.
|
||||
*/
|
||||
ksft_test_result_xfail("Leak from child into parent\n");
|
||||
log_test_result(KSFT_XFAIL);
|
||||
} else {
|
||||
ksft_test_result_fail("Leak from child into parent\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
}
|
||||
close_pipe:
|
||||
close(fds[0]);
|
||||
|
@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
|
||||
ret = setup_comm_pipes(&comm_pipes);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("pipe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
file = tmpfile();
|
||||
if (!file) {
|
||||
ksft_test_result_fail("tmpfile() failed\n");
|
||||
ksft_perror("tmpfile() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
fd = fileno(file);
|
||||
|
@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
|
||||
tmp = malloc(size);
|
||||
if (!tmp) {
|
||||
ksft_test_result_fail("malloc() failed\n");
|
||||
ksft_print_msg("malloc() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_file;
|
||||
}
|
||||
|
||||
/* Skip on errors, as we might just lack kernel support. */
|
||||
ret = io_uring_queue_init(1, &ring, 0);
|
||||
if (ret < 0) {
|
||||
ksft_test_result_skip("io_uring_queue_init() failed\n");
|
||||
ksft_print_msg("io_uring_queue_init() failed\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto free_tmp;
|
||||
}
|
||||
|
||||
|
@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
iov.iov_len = size;
|
||||
ret = io_uring_register_buffers(&ring, &iov, 1);
|
||||
if (ret) {
|
||||
ksft_test_result_skip("io_uring_register_buffers() failed\n");
|
||||
ksft_print_msg("io_uring_register_buffers() failed\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto queue_exit;
|
||||
}
|
||||
|
||||
|
@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
*/
|
||||
ret = fork();
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("fork() failed\n");
|
||||
ksft_perror("fork() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto unregister_buffers;
|
||||
} else if (!ret) {
|
||||
write(comm_pipes.child_ready[1], "0", 1);
|
||||
|
@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
* if the page is mapped R/O vs. R/W).
|
||||
*/
|
||||
ret = mprotect(mem, size, PROT_READ);
|
||||
clear_softdirty();
|
||||
ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto unregister_buffers;
|
||||
}
|
||||
|
||||
clear_softdirty();
|
||||
ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto unregister_buffers;
|
||||
}
|
||||
}
|
||||
|
@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
memset(mem, 0xff, size);
|
||||
sqe = io_uring_get_sqe(&ring);
|
||||
if (!sqe) {
|
||||
ksft_test_result_fail("io_uring_get_sqe() failed\n");
|
||||
ksft_print_msg("io_uring_get_sqe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto quit_child;
|
||||
}
|
||||
io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
|
||||
|
||||
ret = io_uring_submit(&ring);
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("io_uring_submit() failed\n");
|
||||
ksft_print_msg("io_uring_submit() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto quit_child;
|
||||
}
|
||||
|
||||
ret = io_uring_wait_cqe(&ring, &cqe);
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("io_uring_wait_cqe() failed\n");
|
||||
ksft_print_msg("io_uring_wait_cqe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto quit_child;
|
||||
}
|
||||
|
||||
if (cqe->res != size) {
|
||||
ksft_test_result_fail("write_fixed failed\n");
|
||||
ksft_print_msg("write_fixed failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto quit_child;
|
||||
}
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
|
@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
|
|||
while (total < size) {
|
||||
cur = pread(fd, tmp + total, size - total, total);
|
||||
if (cur < 0) {
|
||||
ksft_test_result_fail("pread() failed\n");
|
||||
ksft_print_msg("pread() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto quit_child;
|
||||
}
|
||||
total += cur;
|
||||
}
|
||||
|
||||
/* Finally, check if we read what we expected. */
|
||||
ksft_test_result(!memcmp(mem, tmp, size),
|
||||
"Longterm R/W pin is reliable\n");
|
||||
if (!memcmp(mem, tmp, size))
|
||||
log_test_result(KSFT_PASS);
|
||||
else
|
||||
log_test_result(KSFT_FAIL);
|
||||
|
||||
quit_child:
|
||||
if (use_fork) {
|
||||
|
@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
|
|||
int ret;
|
||||
|
||||
if (gup_fd < 0) {
|
||||
ksft_test_result_skip("gup_test not available\n");
|
||||
ksft_print_msg("gup_test not available\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
return;
|
||||
}
|
||||
|
||||
tmp = malloc(size);
|
||||
if (!tmp) {
|
||||
ksft_test_result_fail("malloc() failed\n");
|
||||
ksft_print_msg("malloc() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = setup_comm_pipes(&comm_pipes);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("pipe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto free_tmp;
|
||||
}
|
||||
|
||||
|
@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
|
|||
*/
|
||||
ret = fork();
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("fork() failed\n");
|
||||
ksft_perror("fork() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
} else if (!ret) {
|
||||
write(comm_pipes.child_ready[1], "0", 1);
|
||||
|
@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
|
|||
clear_softdirty();
|
||||
ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
break;
|
||||
|
@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
|
|||
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
|
||||
if (ret) {
|
||||
if (errno == EINVAL)
|
||||
ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
|
||||
ret = KSFT_SKIP;
|
||||
else
|
||||
ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
|
||||
ret = KSFT_FAIL;
|
||||
ksft_perror("PIN_LONGTERM_TEST_START failed");
|
||||
log_test_result(ret);
|
||||
goto wait;
|
||||
}
|
||||
|
||||
|
@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
|
|||
*/
|
||||
tmp_val = (__u64)(uintptr_t)tmp;
|
||||
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
|
||||
if (ret)
|
||||
ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
|
||||
else
|
||||
ksft_test_result(!memcmp(mem, tmp, size),
|
||||
"Longterm R/O pin is reliable\n");
|
||||
if (ret) {
|
||||
ksft_perror("PIN_LONGTERM_TEST_READ failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
} else {
|
||||
if (!memcmp(mem, tmp, size))
|
||||
log_test_result(KSFT_PASS);
|
||||
else
|
||||
log_test_result(KSFT_FAIL);
|
||||
}
|
||||
|
||||
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
|
||||
if (ret)
|
||||
ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
|
||||
ksft_perror("PIN_LONGTERM_TEST_STOP failed");
|
||||
wait:
|
||||
switch (test) {
|
||||
case RO_PIN_TEST_SHARED:
|
||||
write(comm_pipes.parent_ready[1], "0", 1);
|
||||
wait(&ret);
|
||||
if (!WIFEXITED(ret))
|
||||
ksft_print_msg("[INFO] wait() failed\n");
|
||||
ksft_perror("wait() failed");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
|
|||
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
|
||||
/* Ignore if not around on a kernel. */
|
||||
if (ret && errno != EINVAL) {
|
||||
ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
|
||||
ksft_perror("MADV_NOHUGEPAGE failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
|
|||
if (swapout) {
|
||||
madvise(mem, pagesize, MADV_PAGEOUT);
|
||||
if (!pagemap_is_swapped(pagemap_fd, mem)) {
|
||||
ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
|
||||
ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto munmap;
|
||||
}
|
||||
}
|
||||
|
@ -775,13 +827,13 @@ munmap:
|
|||
|
||||
static void run_with_base_page(test_fn fn, const char *desc)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with base page\n", desc);
|
||||
log_test_start("%s ... with base page", desc);
|
||||
do_run_with_base_page(fn, false);
|
||||
}
|
||||
|
||||
static void run_with_base_page_swap(test_fn fn, const char *desc)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
|
||||
log_test_start("%s ... with swapped out base page", desc);
|
||||
do_run_with_base_page(fn, true);
|
||||
}
|
||||
|
||||
|
@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (mmap_mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
|
||||
ret = madvise(mem, thpsize, MADV_HUGEPAGE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_HUGEPAGE failed\n");
|
||||
ksft_perror("MADV_HUGEPAGE failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
*/
|
||||
mem[0] = 1;
|
||||
if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
|
||||
ksft_test_result_skip("Did not get a THP populated\n");
|
||||
ksft_print_msg("Did not get a THP populated\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto munmap;
|
||||
}
|
||||
memset(mem, 1, thpsize);
|
||||
|
@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
*/
|
||||
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
break;
|
||||
|
@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
*/
|
||||
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_DONTNEED failed\n");
|
||||
ksft_perror("MADV_DONTNEED failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
size = pagesize;
|
||||
|
@ -877,13 +935,15 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (mremap_mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
|
||||
MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
|
||||
if (tmp != mremap_mem) {
|
||||
ksft_test_result_fail("mremap() failed\n");
|
||||
ksft_perror("mremap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
size = mremap_size;
|
||||
|
@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
*/
|
||||
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_DONTFORK failed\n");
|
||||
ksft_perror("MADV_DONTFORK failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
ret = fork();
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("fork() failed\n");
|
||||
ksft_perror("fork() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
} else if (!ret) {
|
||||
exit(0);
|
||||
|
@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
/* Allow for sharing all pages again. */
|
||||
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_DOFORK failed\n");
|
||||
ksft_perror("MADV_DOFORK failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
break;
|
||||
|
@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
|
|||
case THP_RUN_SINGLE_PTE_SWAPOUT:
|
||||
madvise(mem, size, MADV_PAGEOUT);
|
||||
if (!range_is_swapped(mem, size)) {
|
||||
ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
|
||||
ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto munmap;
|
||||
}
|
||||
break;
|
||||
|
@ -941,56 +1005,56 @@ munmap:
|
|||
|
||||
static void run_with_thp(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
|
||||
log_test_start("%s ... with THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_PMD, size);
|
||||
}
|
||||
|
||||
static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
|
||||
log_test_start("%s ... with swapped-out THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
|
||||
}
|
||||
|
||||
static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
|
||||
log_test_start("%s ... with PTE-mapped THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_PTE, size);
|
||||
}
|
||||
|
||||
static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
|
||||
log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
|
||||
}
|
||||
|
||||
static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
|
||||
log_test_start("%s ... with single PTE of THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
|
||||
}
|
||||
|
||||
static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
|
||||
log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
|
||||
}
|
||||
|
||||
static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
|
||||
log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
|
||||
}
|
||||
|
||||
static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
|
||||
{
|
||||
ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
|
||||
log_test_start("%s ... with partially shared THP (%zu kB)",
|
||||
desc, size / 1024);
|
||||
do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
|
||||
}
|
||||
|
@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
|
|||
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
|
||||
char *mem, *dummy;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
|
||||
log_test_start("%s ... with hugetlb (%zu kB)", desc,
|
||||
hugetlbsize / 1024);
|
||||
|
||||
flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
|
||||
|
||||
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
ksft_test_result_skip("need more free huge pages\n");
|
||||
ksft_perror("need more free huge pages");
|
||||
log_test_result(KSFT_SKIP);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
|
|||
*/
|
||||
dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
|
||||
if (dummy == MAP_FAILED) {
|
||||
ksft_test_result_skip("need more free huge pages\n");
|
||||
ksft_perror("need more free huge pages");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto munmap;
|
||||
}
|
||||
munmap(dummy, hugetlbsize);
|
||||
|
@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
|
||||
ret = setup_comm_pipes(&comm_pipes);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("pipe() failed\n");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
*/
|
||||
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed\n");
|
||||
ksft_perror("mprotect() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
|
||||
|
@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
/* Collapse before actually COW-sharing the page. */
|
||||
ret = madvise(mem, size, MADV_COLLAPSE);
|
||||
if (ret) {
|
||||
ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
|
||||
strerror(errno));
|
||||
ksft_perror("MADV_COLLAPSE failed");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
break;
|
||||
|
@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
/* Don't COW-share the upper part of the THP. */
|
||||
ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_DONTFORK failed\n");
|
||||
ksft_perror("MADV_DONTFORK failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
break;
|
||||
|
@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
/* Don't COW-share the lower part of the THP. */
|
||||
ret = madvise(mem, size / 2, MADV_DONTFORK);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_DONTFORK failed\n");
|
||||
ksft_perror("MADV_DONTFORK failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
}
|
||||
break;
|
||||
|
@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
|
||||
ret = fork();
|
||||
if (ret < 0) {
|
||||
ksft_test_result_fail("fork() failed\n");
|
||||
ksft_perror("fork() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close_comm_pipes;
|
||||
} else if (!ret) {
|
||||
switch (test) {
|
||||
|
@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
*/
|
||||
ret = madvise(mem, size, MADV_DOFORK);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_DOFORK failed\n");
|
||||
ksft_perror("MADV_DOFORK failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
write(comm_pipes.parent_ready[1], "0", 1);
|
||||
wait(&ret);
|
||||
goto close_comm_pipes;
|
||||
|
@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
/* Collapse before anyone modified the COW-shared page. */
|
||||
ret = madvise(mem, size, MADV_COLLAPSE);
|
||||
if (ret) {
|
||||
ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
|
||||
strerror(errno));
|
||||
ksft_perror("MADV_COLLAPSE failed");
|
||||
log_test_result(KSFT_SKIP);
|
||||
write(comm_pipes.parent_ready[1], "0", 1);
|
||||
wait(&ret);
|
||||
goto close_comm_pipes;
|
||||
|
@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
|
|||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
ksft_test_result(!ret, "No leak from parent into child\n");
|
||||
if (!ret)
|
||||
log_test_result(KSFT_PASS);
|
||||
else
|
||||
log_test_result(KSFT_FAIL);
|
||||
close_comm_pipes:
|
||||
close_comm_pipes(&comm_pipes);
|
||||
}
|
||||
|
@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void)
|
|||
for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
|
||||
struct test_case const *test_case = &anon_thp_test_cases[i];
|
||||
|
||||
ksft_print_msg("[RUN] %s\n", test_case->desc);
|
||||
log_test_start("%s", test_case->desc);
|
||||
do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
|
||||
}
|
||||
}
|
||||
|
@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size)
|
|||
memset(mem, 0xff, size);
|
||||
|
||||
/* See if we still read the old values via the other mapping. */
|
||||
ksft_test_result(!memcmp(smem, old, size),
|
||||
"Other mapping not modified\n");
|
||||
if (!memcmp(smem, old, size))
|
||||
log_test_result(KSFT_PASS);
|
||||
else
|
||||
log_test_result(KSFT_FAIL);
|
||||
free(old);
|
||||
}
|
||||
|
||||
|
@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
|
|||
{
|
||||
char *mem, *smem, tmp;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
|
||||
log_test_start("%s ... with shared zeropage", desc);
|
||||
|
||||
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
if (smem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
|
|||
size_t mmap_size;
|
||||
int ret;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
|
||||
log_test_start("%s ... with huge zeropage", desc);
|
||||
|
||||
if (!has_huge_zeropage) {
|
||||
ksft_test_result_skip("Huge zeropage not enabled\n");
|
||||
ksft_print_msg("Huge zeropage not enabled\n");
|
||||
log_test_result(KSFT_SKIP);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
|
|||
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (mmap_mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
mmap_smem = mmap(NULL, mmap_size, PROT_READ,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (mmap_smem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
|
|||
smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
|
||||
|
||||
ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
|
||||
if (ret != 0) {
|
||||
ksft_perror("madvise()");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("MADV_HUGEPAGE failed\n");
|
||||
if (ret != 0) {
|
||||
ksft_perror("madvise()");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
|
|||
char *mem, *smem, tmp;
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with memfd\n", desc);
|
||||
log_test_start("%s ... with memfd", desc);
|
||||
|
||||
fd = memfd_create("test", 0);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_fail("memfd_create() failed\n");
|
||||
ksft_perror("memfd_create() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* File consists of a single page filled with zeroes. */
|
||||
if (fallocate(fd, 0, 0, pagesize)) {
|
||||
ksft_test_result_fail("fallocate() failed\n");
|
||||
ksft_perror("fallocate() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close;
|
||||
}
|
||||
|
||||
/* Create a private mapping of the memfd. */
|
||||
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close;
|
||||
}
|
||||
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (smem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
|
|||
FILE *file;
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
|
||||
log_test_start("%s ... with tmpfile", desc);
|
||||
|
||||
file = tmpfile();
|
||||
if (!file) {
|
||||
ksft_test_result_fail("tmpfile() failed\n");
|
||||
ksft_perror("tmpfile() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
fd = fileno(file);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_skip("fileno() failed\n");
|
||||
ksft_perror("fileno() failed");
|
||||
log_test_result(KSFT_SKIP);
|
||||
return;
|
||||
}
|
||||
|
||||
/* File consists of a single page filled with zeroes. */
|
||||
if (fallocate(fd, 0, 0, pagesize)) {
|
||||
ksft_test_result_fail("fallocate() failed\n");
|
||||
ksft_perror("fallocate() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close;
|
||||
}
|
||||
|
||||
/* Create a private mapping of the memfd. */
|
||||
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto close;
|
||||
}
|
||||
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (smem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
|
|||
char *mem, *smem, tmp;
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
|
||||
log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
|
||||
hugetlbsize / 1024);
|
||||
|
||||
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
|
||||
|
||||
fd = memfd_create("test", flags);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_skip("memfd_create() failed\n");
|
||||
ksft_perror("memfd_create() failed");
|
||||
log_test_result(KSFT_SKIP);
|
||||
return;
|
||||
}
|
||||
|
||||
/* File consists of a single page filled with zeroes. */
|
||||
if (fallocate(fd, 0, 0, hugetlbsize)) {
|
||||
ksft_test_result_skip("need more free huge pages\n");
|
||||
ksft_perror("need more free huge pages");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto close;
|
||||
}
|
||||
|
||||
|
@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
|
|||
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
|
||||
0);
|
||||
if (mem == MAP_FAILED) {
|
||||
ksft_test_result_skip("need more free huge pages\n");
|
||||
ksft_perror("need more free huge pages");
|
||||
log_test_result(KSFT_SKIP);
|
||||
goto close;
|
||||
}
|
||||
smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (smem == MAP_FAILED) {
|
||||
ksft_test_result_fail("mmap() failed\n");
|
||||
ksft_perror("mmap() failed");
|
||||
log_test_result(KSFT_FAIL);
|
||||
goto munmap;
|
||||
}
|
||||
|
||||
|
@ -1771,7 +1872,6 @@ static int tests_per_non_anon_test_case(void)
|
|||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int err;
|
||||
struct thp_settings default_settings;
|
||||
|
||||
ksft_print_header();
|
||||
|
@ -1811,9 +1911,5 @@ int main(int argc, char **argv)
|
|||
thp_restore_settings();
|
||||
}
|
||||
|
||||
err = ksft_get_fail_cnt();
|
||||
if (err)
|
||||
ksft_exit_fail_msg("%d out of %d tests failed\n",
|
||||
err, ksft_test_num());
|
||||
ksft_exit_pass();
|
||||
ksft_finished();
|
||||
}
|
||||
|
|
|
@ -1453,8 +1453,21 @@ TEST_F(guard_regions, uffd)
|
|||
|
||||
/* Set up uffd. */
|
||||
uffd = userfaultfd(0);
|
||||
if (uffd == -1 && errno == EPERM)
|
||||
ksft_exit_skip("No userfaultfd permissions, try running as root.\n");
|
||||
if (uffd == -1) {
|
||||
switch (errno) {
|
||||
case EPERM:
|
||||
SKIP(return, "No userfaultfd permissions, try running as root.");
|
||||
break;
|
||||
case ENOSYS:
|
||||
SKIP(return, "userfaultfd is not supported/not enabled.");
|
||||
break;
|
||||
default:
|
||||
ksft_exit_fail_msg("userfaultfd failed with %s\n",
|
||||
strerror(errno));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NE(uffd, -1);
|
||||
|
||||
ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
|
||||
|
|
|
@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
__fsword_t fs_type = get_fs_type(fd);
|
||||
bool should_work;
|
||||
char *mem;
|
||||
int result = KSFT_PASS;
|
||||
int ret;
|
||||
|
||||
if (fd < 0) {
|
||||
result = KSFT_FAIL;
|
||||
goto report;
|
||||
}
|
||||
|
||||
if (ftruncate(fd, size)) {
|
||||
if (errno == ENOENT) {
|
||||
skip_test_dodgy_fs("ftruncate()");
|
||||
} else {
|
||||
ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
|
||||
ksft_print_msg("ftruncate() failed (%s)\n",
|
||||
strerror(errno));
|
||||
result = KSFT_FAIL;
|
||||
goto report;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (fallocate(fd, 0, 0, size)) {
|
||||
if (size == pagesize)
|
||||
ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
|
||||
else
|
||||
ksft_test_result_skip("need more free huge pages\n");
|
||||
return;
|
||||
if (size == pagesize) {
|
||||
ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
|
||||
result = KSFT_FAIL;
|
||||
} else {
|
||||
ksft_print_msg("need more free huge pages\n");
|
||||
result = KSFT_SKIP;
|
||||
}
|
||||
goto report;
|
||||
}
|
||||
|
||||
mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
|
||||
shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
if (size == pagesize || shared)
|
||||
ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
|
||||
else
|
||||
ksft_test_result_skip("need more free huge pages\n");
|
||||
return;
|
||||
if (size == pagesize || shared) {
|
||||
ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
|
||||
result = KSFT_FAIL;
|
||||
} else {
|
||||
ksft_print_msg("need more free huge pages\n");
|
||||
result = KSFT_SKIP;
|
||||
}
|
||||
goto report;
|
||||
}
|
||||
|
||||
/* Fault in the page such that GUP-fast can pin it directly. */
|
||||
|
@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
*/
|
||||
ret = mprotect(mem, size, PROT_READ);
|
||||
if (ret) {
|
||||
ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
|
||||
ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
|
||||
result = KSFT_FAIL;
|
||||
goto munmap;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
|
@ -147,12 +163,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
type == TEST_TYPE_RW_FAST;
|
||||
|
||||
if (gup_fd < 0) {
|
||||
ksft_test_result_skip("gup_test not available\n");
|
||||
ksft_print_msg("gup_test not available\n");
|
||||
result = KSFT_SKIP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (rw && shared && fs_is_unknown(fs_type)) {
|
||||
ksft_test_result_skip("Unknown filesystem\n");
|
||||
ksft_print_msg("Unknown filesystem\n");
|
||||
result = KSFT_SKIP;
|
||||
return;
|
||||
}
|
||||
/*
|
||||
|
@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
|
||||
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
|
||||
if (ret && errno == EINVAL) {
|
||||
ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
|
||||
ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
|
||||
result = KSFT_SKIP;
|
||||
break;
|
||||
} else if (ret && errno == EFAULT) {
|
||||
ksft_test_result(!should_work, "Should have failed\n");
|
||||
if (should_work)
|
||||
result = KSFT_FAIL;
|
||||
else
|
||||
result = KSFT_PASS;
|
||||
break;
|
||||
} else if (ret) {
|
||||
ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
|
||||
strerror(errno));
|
||||
ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
|
||||
strerror(errno));
|
||||
result = KSFT_FAIL;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
* some previously unsupported filesystems, we might want to
|
||||
* perform some additional tests for possible data corruptions.
|
||||
*/
|
||||
ksft_test_result(should_work, "Should have worked\n");
|
||||
if (should_work)
|
||||
result = KSFT_PASS;
|
||||
else
|
||||
result = KSFT_FAIL;
|
||||
break;
|
||||
}
|
||||
#ifdef LOCAL_CONFIG_HAVE_LIBURING
|
||||
|
@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
|
||||
/* io_uring always pins pages writable. */
|
||||
if (shared && fs_is_unknown(fs_type)) {
|
||||
ksft_test_result_skip("Unknown filesystem\n");
|
||||
return;
|
||||
ksft_print_msg("Unknown filesystem\n");
|
||||
result = KSFT_SKIP;
|
||||
goto report;
|
||||
}
|
||||
should_work = !shared ||
|
||||
fs_supports_writable_longterm_pinning(fs_type);
|
||||
|
@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
/* Skip on errors, as we might just lack kernel support. */
|
||||
ret = io_uring_queue_init(1, &ring, 0);
|
||||
if (ret < 0) {
|
||||
ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
|
||||
strerror(-ret));
|
||||
ksft_print_msg("io_uring_queue_init() failed (%s)\n",
|
||||
strerror(-ret));
|
||||
result = KSFT_SKIP;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
|
@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
/* Only new kernels return EFAULT. */
|
||||
if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
|
||||
errno == EFAULT)) {
|
||||
ksft_test_result(!should_work, "Should have failed (%s)\n",
|
||||
strerror(errno));
|
||||
if (should_work) {
|
||||
ksft_print_msg("Should have failed (%s)\n",
|
||||
strerror(errno));
|
||||
result = KSFT_FAIL;
|
||||
} else {
|
||||
result = KSFT_PASS;
|
||||
}
|
||||
} else if (ret) {
|
||||
/*
|
||||
* We might just lack support or have insufficient
|
||||
* MEMLOCK limits.
|
||||
*/
|
||||
ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
|
||||
strerror(-ret));
|
||||
ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
|
||||
strerror(-ret));
|
||||
result = KSFT_SKIP;
|
||||
} else {
|
||||
ksft_test_result(should_work, "Should have worked\n");
|
||||
if (should_work) {
|
||||
result = KSFT_PASS;
|
||||
} else {
|
||||
ksft_print_msg("Should have worked\n");
|
||||
result = KSFT_FAIL;
|
||||
}
|
||||
io_uring_unregister_buffers(&ring);
|
||||
}
|
||||
|
||||
|
@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
|
|||
|
||||
munmap:
|
||||
munmap(mem, size);
|
||||
report:
|
||||
log_test_result(result);
|
||||
}
|
||||
|
||||
typedef void (*test_fn)(int fd, size_t size);
|
||||
|
@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc)
|
|||
{
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with memfd\n", desc);
|
||||
log_test_start("%s ... with memfd", desc);
|
||||
|
||||
fd = memfd_create("test", 0);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
|
||||
return;
|
||||
}
|
||||
if (fd < 0)
|
||||
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
|
||||
|
||||
fn(fd, pagesize);
|
||||
close(fd);
|
||||
|
@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
|
|||
FILE *file;
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
|
||||
log_test_start("%s ... with tmpfile", desc);
|
||||
|
||||
file = tmpfile();
|
||||
if (!file) {
|
||||
ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
|
||||
return;
|
||||
}
|
||||
|
||||
fd = fileno(file);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
|
||||
goto close;
|
||||
ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
|
||||
fd = -1;
|
||||
} else {
|
||||
fd = fileno(file);
|
||||
if (fd < 0) {
|
||||
ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
fn(fd, pagesize);
|
||||
close:
|
||||
fclose(file);
|
||||
|
||||
if (file)
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
static void run_with_local_tmpfile(test_fn fn, const char *desc)
|
||||
|
@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc)
|
|||
char filename[] = __FILE__"_tmpfile_XXXXXX";
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
|
||||
log_test_start("%s ... with local tmpfile", desc);
|
||||
|
||||
fd = mkstemp(filename);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
|
||||
return;
|
||||
}
|
||||
if (fd < 0)
|
||||
ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
|
||||
|
||||
if (unlink(filename)) {
|
||||
ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
|
||||
goto close;
|
||||
ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
|
||||
close(fd);
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
fn(fd, pagesize);
|
||||
close:
|
||||
close(fd);
|
||||
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
|
||||
|
@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
|
|||
int flags = MFD_HUGETLB;
|
||||
int fd;
|
||||
|
||||
ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
|
||||
log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
|
||||
hugetlbsize / 1024);
|
||||
|
||||
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
|
||||
|
||||
fd = memfd_create("test", flags);
|
||||
if (fd < 0) {
|
||||
ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
|
||||
return;
|
||||
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
|
||||
}
|
||||
|
||||
fn(fd, hugetlbsize);
|
||||
|
@ -455,7 +493,7 @@ static int tests_per_test_case(void)
|
|||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i, err;
|
||||
int i;
|
||||
|
||||
pagesize = getpagesize();
|
||||
nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
|
||||
|
@ -469,9 +507,5 @@ int main(int argc, char **argv)
|
|||
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
|
||||
run_test_case(&test_cases[i]);
|
||||
|
||||
err = ksft_get_fail_cnt();
|
||||
if (err)
|
||||
ksft_exit_fail_msg("%d out of %d tests failed\n",
|
||||
err, ksft_test_num());
|
||||
ksft_exit_pass();
|
||||
ksft_finished();
|
||||
}
|
||||
|
|
|
@ -172,12 +172,12 @@ static void test_populate_read(void)
|
|||
if (addr == MAP_FAILED)
|
||||
ksft_exit_fail_msg("mmap failed\n");
|
||||
ksft_test_result(range_is_not_populated(addr, SIZE),
|
||||
"range initially not populated\n");
|
||||
"read range initially not populated\n");
|
||||
|
||||
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
|
||||
ksft_test_result(!ret, "MADV_POPULATE_READ\n");
|
||||
ksft_test_result(range_is_populated(addr, SIZE),
|
||||
"range is populated\n");
|
||||
"read range is populated\n");
|
||||
|
||||
munmap(addr, SIZE);
|
||||
}
|
||||
|
@ -194,12 +194,12 @@ static void test_populate_write(void)
|
|||
if (addr == MAP_FAILED)
|
||||
ksft_exit_fail_msg("mmap failed\n");
|
||||
ksft_test_result(range_is_not_populated(addr, SIZE),
|
||||
"range initially not populated\n");
|
||||
"write range initially not populated\n");
|
||||
|
||||
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
|
||||
ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
|
||||
ksft_test_result(range_is_populated(addr, SIZE),
|
||||
"range is populated\n");
|
||||
"write range is populated\n");
|
||||
|
||||
munmap(addr, SIZE);
|
||||
}
|
||||
|
@ -247,19 +247,19 @@ static void test_softdirty(void)
|
|||
/* Clear any softdirty bits. */
|
||||
clear_softdirty();
|
||||
ksft_test_result(range_is_not_softdirty(addr, SIZE),
|
||||
"range is not softdirty\n");
|
||||
"cleared range is not softdirty\n");
|
||||
|
||||
/* Populating READ should set softdirty. */
|
||||
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
|
||||
ksft_test_result(!ret, "MADV_POPULATE_READ\n");
|
||||
ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n");
|
||||
ksft_test_result(range_is_not_softdirty(addr, SIZE),
|
||||
"range is not softdirty\n");
|
||||
"range is not softdirty after MADV_POPULATE_READ\n");
|
||||
|
||||
/* Populating WRITE should set softdirty. */
|
||||
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
|
||||
ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
|
||||
ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n");
|
||||
ksft_test_result(range_is_softdirty(addr, SIZE),
|
||||
"range is softdirty\n");
|
||||
"range is softdirty after MADV_POPULATE_WRITE \n");
|
||||
|
||||
munmap(addr, SIZE);
|
||||
}
|
||||
|
|
|
@ -196,7 +196,7 @@ static void test_mlock_lock(void)
|
|||
ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
|
||||
}
|
||||
|
||||
ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__);
|
||||
ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__);
|
||||
munmap(map, 2 * page_size);
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
|
@ -43,14 +45,62 @@ static int test_read_access(char *addr, size_t size, size_t pagesize)
|
|||
/* Force a read that the compiler cannot optimize out. */
|
||||
*((volatile char *)(addr + offs));
|
||||
}
|
||||
if (signal(SIGSEGV, signal_handler) == SIG_ERR)
|
||||
if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
|
||||
return -EINVAL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int find_ram_target(off_t *phys_addr,
|
||||
unsigned long long pagesize)
|
||||
{
|
||||
unsigned long long start, end;
|
||||
char line[80], *end_ptr;
|
||||
FILE *file;
|
||||
|
||||
/* Search /proc/iomem for the first suitable "System RAM" range. */
|
||||
file = fopen("/proc/iomem", "r");
|
||||
if (!file)
|
||||
return -errno;
|
||||
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
/* Ignore any child nodes. */
|
||||
if (!isalnum(line[0]))
|
||||
continue;
|
||||
|
||||
if (!strstr(line, "System RAM\n"))
|
||||
continue;
|
||||
|
||||
start = strtoull(line, &end_ptr, 16);
|
||||
/* Skip over the "-" */
|
||||
end_ptr++;
|
||||
/* Make end "exclusive". */
|
||||
end = strtoull(end_ptr, NULL, 16) + 1;
|
||||
|
||||
/* Actual addresses are not exported */
|
||||
if (!start && !end)
|
||||
break;
|
||||
|
||||
/* We need full pages. */
|
||||
start = (start + pagesize - 1) & ~(pagesize - 1);
|
||||
end &= ~(pagesize - 1);
|
||||
|
||||
if (start != (off_t)start)
|
||||
break;
|
||||
|
||||
/* We need two pages. */
|
||||
if (end > start + 2 * pagesize) {
|
||||
fclose(file);
|
||||
*phys_addr = start;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
FIXTURE(pfnmap)
|
||||
{
|
||||
off_t phys_addr;
|
||||
size_t pagesize;
|
||||
int dev_mem_fd;
|
||||
char *addr1;
|
||||
|
@ -63,14 +113,17 @@ FIXTURE_SETUP(pfnmap)
|
|||
{
|
||||
self->pagesize = getpagesize();
|
||||
|
||||
/* We'll require two physical pages throughout our tests ... */
|
||||
if (find_ram_target(&self->phys_addr, self->pagesize))
|
||||
SKIP(return, "Cannot find ram target in '/proc/iomem'\n");
|
||||
|
||||
self->dev_mem_fd = open("/dev/mem", O_RDONLY);
|
||||
if (self->dev_mem_fd < 0)
|
||||
SKIP(return, "Cannot open '/dev/mem'\n");
|
||||
|
||||
/* We'll require the first two pages throughout our tests ... */
|
||||
self->size1 = self->pagesize * 2;
|
||||
self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
|
||||
self->dev_mem_fd, 0);
|
||||
self->dev_mem_fd, self->phys_addr);
|
||||
if (self->addr1 == MAP_FAILED)
|
||||
SKIP(return, "Cannot mmap '/dev/mem'\n");
|
||||
|
||||
|
@ -129,7 +182,7 @@ TEST_F(pfnmap, munmap_split)
|
|||
*/
|
||||
self->size2 = self->pagesize;
|
||||
self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
|
||||
self->dev_mem_fd, 0);
|
||||
self->dev_mem_fd, self->phys_addr);
|
||||
ASSERT_NE(self->addr2, MAP_FAILED);
|
||||
}
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags)
|
|||
|
||||
show(size);
|
||||
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
|
||||
"%s mmap %lu\n", __func__, size);
|
||||
"%s mmap %lu %x\n", __func__, size, flags);
|
||||
|
||||
if (munmap(map, size * NUM_PAGES))
|
||||
ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
|
||||
|
@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags)
|
|||
|
||||
show(size);
|
||||
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
|
||||
"%s: mmap %lu\n", __func__, size);
|
||||
"%s: mmap %lu %x\n", __func__, size, flags);
|
||||
if (shmdt(map))
|
||||
ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
|
||||
}
|
||||
|
|
|
@ -7,23 +7,20 @@
|
|||
# real test to check that the kernel is configured to support at least 5
|
||||
# pagetable levels.
|
||||
|
||||
# 1 means the test failed
|
||||
exitcode=1
|
||||
|
||||
# Kselftest framework requirement - SKIP code is 4.
|
||||
ksft_skip=4
|
||||
|
||||
fail()
|
||||
skip()
|
||||
{
|
||||
echo "$1"
|
||||
exit $exitcode
|
||||
exit $ksft_skip
|
||||
}
|
||||
|
||||
check_supported_x86_64()
|
||||
{
|
||||
local config="/proc/config.gz"
|
||||
[[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
|
||||
[[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
|
||||
[[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
|
||||
|
||||
# gzip -dcfq automatically handles both compressed and plaintext input.
|
||||
# See man 1 gzip under '-f'.
|
||||
|
@ -33,11 +30,9 @@ check_supported_x86_64()
|
|||
else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
|
||||
|
||||
if [[ "${pg_table_levels}" -lt 5 ]]; then
|
||||
echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
|
||||
exit $ksft_skip
|
||||
skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
|
||||
elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
|
||||
echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
|
||||
exit $ksft_skip
|
||||
skip "$0: CPU does not have the necessary la57 flag to support page table level 5"
|
||||
fi
|
||||
}
|
||||
|
||||
|
@ -45,24 +40,21 @@ check_supported_ppc64()
|
|||
{
|
||||
local config="/proc/config.gz"
|
||||
[[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
|
||||
[[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
|
||||
[[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
|
||||
|
||||
local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
|
||||
if [[ "${pg_table_levels}" -lt 5 ]]; then
|
||||
echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
|
||||
exit $ksft_skip
|
||||
skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
|
||||
fi
|
||||
|
||||
local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}')
|
||||
if [[ "$mmu_support" != "radix" ]]; then
|
||||
echo "$0: System does not use Radix MMU, required for 5-level paging"
|
||||
exit $ksft_skip
|
||||
skip "$0: System does not use Radix MMU, required for 5-level paging"
|
||||
fi
|
||||
|
||||
local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo)
|
||||
if [[ "${hugepages_total}" -eq 0 ]]; then
|
||||
echo "$0: HugePages are not enabled, required for some tests"
|
||||
exit $ksft_skip
|
||||
skip "$0: HugePages are not enabled, required for some tests"
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -439,7 +439,7 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out)
|
|||
sprintf(path, "/proc/%d/maps", pid);
|
||||
procmap_out->query.size = sizeof(procmap_out->query);
|
||||
procmap_out->fd = open(path, O_RDONLY);
|
||||
if (procmap_out < 0)
|
||||
if (procmap_out->fd < 0)
|
||||
ret = -errno;
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <stdbool.h>
|
||||
#include <sys/mman.h>
|
||||
#include <err.h>
|
||||
#include <stdarg.h>
|
||||
#include <strings.h> /* ffsl() */
|
||||
#include <unistd.h> /* _SC_PAGESIZE */
|
||||
#include "../kselftest.h"
|
||||
|
@ -95,6 +96,25 @@ static inline int open_self_procmap(struct procmap_fd *procmap_out)
|
|||
return open_procmap(pid, procmap_out);
|
||||
}
|
||||
|
||||
/* These helpers need to be inline to match the kselftest.h idiom. */
|
||||
static char test_name[1024];
|
||||
|
||||
static inline void log_test_start(const char *name, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, name);
|
||||
|
||||
vsnprintf(test_name, sizeof(test_name), name, args);
|
||||
ksft_print_msg("[RUN] %s\n", test_name);
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static inline void log_test_result(int result)
|
||||
{
|
||||
ksft_test_result_report(result, "%s\n", test_name);
|
||||
}
|
||||
|
||||
/*
|
||||
* On ppc64 this will only work with radix 2M hugepage size
|
||||
*/
|
||||
|
|
|
@ -1461,4 +1461,9 @@ static inline int __call_mmap_prepare(struct file *file,
|
|||
return file->f_op->mmap_prepare(desc);
|
||||
}
|
||||
|
||||
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
|
||||
{
|
||||
(void)vma;
|
||||
}
|
||||
|
||||
#endif /* __MM_VMA_INTERNAL_H */
|
||||
|
|
Loading…
Add table
Reference in a new issue