mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	hugetlb: take PMD sharing into account when flushing tlb/caches
When fixing an issue with PMD sharing and migration, it was discovered via code inspection that other callers of huge_pmd_unshare potentially have an issue with cache and tlb flushing. Use the routine adjust_range_if_pmd_sharing_possible() to calculate worst case ranges for mmu notifiers. Ensure that this range is flushed if huge_pmd_unshare succeeds and unmaps a PUD_SUZE area. Link: http://lkml.kernel.org/r/20180823205917.16297-3-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Michal Hocko <mhocko@kernel.org> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
		
							parent
							
								
									017b1660df
								
							
						
					
					
						commit
						dff11abe28
					
				
					 1 changed files with 44 additions and 9 deletions
				
			
		
							
								
								
									
										53
									
								
								mm/hugetlb.c
									
										
									
									
									
								
							
							
						
						
									
										53
									
								
								mm/hugetlb.c
									
										
									
									
									
								
							|  | @ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 	struct page *page; | ||||
| 	struct hstate *h = hstate_vma(vma); | ||||
| 	unsigned long sz = huge_page_size(h); | ||||
| 	const unsigned long mmun_start = start;	/* For mmu_notifiers */ | ||||
| 	const unsigned long mmun_end   = end;	/* For mmu_notifiers */ | ||||
| 	unsigned long mmun_start = start;	/* For mmu_notifiers */ | ||||
| 	unsigned long mmun_end   = end;		/* For mmu_notifiers */ | ||||
| 
 | ||||
| 	WARN_ON(!is_vm_hugetlb_page(vma)); | ||||
| 	BUG_ON(start & ~huge_page_mask(h)); | ||||
|  | @ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 	 */ | ||||
| 	tlb_remove_check_page_size_change(tlb, sz); | ||||
| 	tlb_start_vma(tlb, vma); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If sharing possible, alert mmu notifiers of worst case. | ||||
| 	 */ | ||||
| 	adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end); | ||||
| 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||||
| 	address = start; | ||||
| 	for (; address < end; address += sz) { | ||||
|  | @ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 		ptl = huge_pte_lock(h, mm, ptep); | ||||
| 		if (huge_pmd_unshare(mm, &address, ptep)) { | ||||
| 			spin_unlock(ptl); | ||||
| 			/*
 | ||||
| 			 * We just unmapped a page of PMDs by clearing a PUD. | ||||
| 			 * The caller's TLB flush range should cover this area. | ||||
| 			 */ | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
|  | @ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
| { | ||||
| 	struct mm_struct *mm; | ||||
| 	struct mmu_gather tlb; | ||||
| 	unsigned long tlb_start = start; | ||||
| 	unsigned long tlb_end = end; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If shared PMDs were possibly used within this vma range, adjust | ||||
| 	 * start/end for worst case tlb flushing. | ||||
| 	 * Note that we can not be sure if PMDs are shared until we try to | ||||
| 	 * unmap pages.  However, we want to make sure TLB flushing covers | ||||
| 	 * the largest possible range. | ||||
| 	 */ | ||||
| 	adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); | ||||
| 
 | ||||
| 	mm = vma->vm_mm; | ||||
| 
 | ||||
| 	tlb_gather_mmu(&tlb, mm, start, end); | ||||
| 	tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); | ||||
| 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page); | ||||
| 	tlb_finish_mmu(&tlb, start, end); | ||||
| 	tlb_finish_mmu(&tlb, tlb_start, tlb_end); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
| 	pte_t pte; | ||||
| 	struct hstate *h = hstate_vma(vma); | ||||
| 	unsigned long pages = 0; | ||||
| 	unsigned long f_start = start; | ||||
| 	unsigned long f_end = end; | ||||
| 	bool shared_pmd = false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * In the case of shared PMDs, the area to flush could be beyond | ||||
| 	 * start/end.  Set f_start/f_end to cover the maximum possible | ||||
| 	 * range if PMD sharing is possible. | ||||
| 	 */ | ||||
| 	adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end); | ||||
| 
 | ||||
| 	BUG_ON(address >= end); | ||||
| 	flush_cache_range(vma, address, end); | ||||
| 	flush_cache_range(vma, f_start, f_end); | ||||
| 
 | ||||
| 	mmu_notifier_invalidate_range_start(mm, start, end); | ||||
| 	mmu_notifier_invalidate_range_start(mm, f_start, f_end); | ||||
| 	i_mmap_lock_write(vma->vm_file->f_mapping); | ||||
| 	for (; address < end; address += huge_page_size(h)) { | ||||
| 		spinlock_t *ptl; | ||||
|  | @ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
| 		if (huge_pmd_unshare(mm, &address, ptep)) { | ||||
| 			pages++; | ||||
| 			spin_unlock(ptl); | ||||
| 			shared_pmd = true; | ||||
| 			continue; | ||||
| 		} | ||||
| 		pte = huge_ptep_get(ptep); | ||||
|  | @ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
| 	 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare | ||||
| 	 * may have cleared our pud entry and done put_page on the page table: | ||||
| 	 * once we release i_mmap_rwsem, another task can do the final put_page | ||||
| 	 * and that page table be reused and filled with junk. | ||||
| 	 * and that page table be reused and filled with junk.  If we actually | ||||
| 	 * did unshare a page of pmds, flush the range corresponding to the pud. | ||||
| 	 */ | ||||
| 	flush_hugetlb_tlb_range(vma, start, end); | ||||
| 	if (shared_pmd) | ||||
| 		flush_hugetlb_tlb_range(vma, f_start, f_end); | ||||
| 	else | ||||
| 		flush_hugetlb_tlb_range(vma, start, end); | ||||
| 	/*
 | ||||
| 	 * No need to call mmu_notifier_invalidate_range() we are downgrading | ||||
| 	 * page table protection not changing it to point to a new page. | ||||
|  | @ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
| 	 * See Documentation/vm/mmu_notifier.rst | ||||
| 	 */ | ||||
| 	i_mmap_unlock_write(vma->vm_file->f_mapping); | ||||
| 	mmu_notifier_invalidate_range_end(mm, start, end); | ||||
| 	mmu_notifier_invalidate_range_end(mm, f_start, f_end); | ||||
| 
 | ||||
| 	return pages << h->order; | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Mike Kravetz
						Mike Kravetz