mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	Merge branch 'akpm' (fixes from Andrew Morton)
Merge patch-bomb from Andrew Morton:
 - part of OCFS2 (review is laggy again)
 - procfs
 - slab
 - all of MM
 - zram, zbud
 - various other random things: arch, filesystems.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (164 commits)
  nosave: consolidate __nosave_{begin,end} in <asm/sections.h>
  include/linux/screen_info.h: remove unused ORIG_* macros
  kernel/sys.c: compat sysinfo syscall: fix undefined behavior
  kernel/sys.c: whitespace fixes
  acct: eliminate compile warning
  kernel/async.c: switch to pr_foo()
  include/linux/blkdev.h: use NULL instead of zero
  include/linux/kernel.h: deduplicate code implementing clamp* macros
  include/linux/kernel.h: rewrite min3, max3 and clamp using min and max
  alpha: use Kbuild logic to include <asm-generic/sections.h>
  frv: remove deprecated IRQF_DISABLED
  frv: remove unused cpuinfo_frv and friends to fix future build error
  zbud: avoid accessing last unused freelist
  zsmalloc: simplify init_zspage free obj linking
  mm/zsmalloc.c: correct comment for fullness group computation
  zram: use notify_free to account all free notifications
  zram: report maximum used memory
  zram: zram memory size limitation
  zsmalloc: change return value unit of zs_get_total_size_bytes
  zsmalloc: move pages_allocated to zs_pool
  ...
			
			
This commit is contained in:
		
						commit
						0cf744bc7a
					
				
					 177 changed files with 4109 additions and 2872 deletions
				
			
		|  | @ -85,14 +85,6 @@ Description: | ||||||
| 		will be compacted. When it completes, memory will be freed | 		will be compacted. When it completes, memory will be freed | ||||||
| 		into blocks which have as many contiguous pages as possible | 		into blocks which have as many contiguous pages as possible | ||||||
| 
 | 
 | ||||||
| What:		/sys/devices/system/node/nodeX/scan_unevictable_pages |  | ||||||
| Date:		October 2008 |  | ||||||
| Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com> |  | ||||||
| Description: |  | ||||||
| 		When set, it triggers scanning the node's unevictable lists |  | ||||||
| 		and move any pages that have become evictable onto the respective |  | ||||||
| 		zone's inactive list. See mm/vmscan.c |  | ||||||
| 
 |  | ||||||
| What:		/sys/devices/system/node/nodeX/hugepages/hugepages-<size>/ | What:		/sys/devices/system/node/nodeX/hugepages/hugepages-<size>/ | ||||||
| Date:		December 2009 | Date:		December 2009 | ||||||
| Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com> | Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com> | ||||||
|  |  | ||||||
|  | @ -77,11 +77,14 @@ What:		/sys/block/zram<id>/notify_free | ||||||
| Date:		August 2010 | Date:		August 2010 | ||||||
| Contact:	Nitin Gupta <ngupta@vflare.org> | Contact:	Nitin Gupta <ngupta@vflare.org> | ||||||
| Description: | Description: | ||||||
| 		The notify_free file is read-only and specifies the number of | 		The notify_free file is read-only. Depending on device usage | ||||||
| 		swap slot free notifications received by this device. These | 		scenario it may account a) the number of pages freed because | ||||||
| 		notifications are sent to a swap block device when a swap slot | 		of swap slot free notifications or b) the number of pages freed | ||||||
| 		is freed. This statistic is applicable only when this disk is | 		because of REQ_DISCARD requests sent by bio. The former ones | ||||||
| 		being used as a swap disk. | 		are sent to a swap block device when a swap slot is freed, which | ||||||
|  | 		implies that this disk is being used as a swap disk. The latter | ||||||
|  | 		ones are sent by filesystem mounted with discard option, | ||||||
|  | 		whenever some data blocks are getting discarded. | ||||||
| 
 | 
 | ||||||
| What:		/sys/block/zram<id>/zero_pages | What:		/sys/block/zram<id>/zero_pages | ||||||
| Date:		August 2010 | Date:		August 2010 | ||||||
|  | @ -119,3 +122,22 @@ Description: | ||||||
| 		efficiency can be calculated using compr_data_size and this | 		efficiency can be calculated using compr_data_size and this | ||||||
| 		statistic. | 		statistic. | ||||||
| 		Unit: bytes | 		Unit: bytes | ||||||
|  | 
 | ||||||
|  | What:		/sys/block/zram<id>/mem_used_max | ||||||
|  | Date:		August 2014 | ||||||
|  | Contact:	Minchan Kim <minchan@kernel.org> | ||||||
|  | Description: | ||||||
|  | 		The mem_used_max file is read/write and specifies the amount | ||||||
|  | 		of maximum memory zram have consumed to store compressed data. | ||||||
|  | 		For resetting the value, you should write "0". Otherwise, | ||||||
|  | 		you could see -EINVAL. | ||||||
|  | 		Unit: bytes | ||||||
|  | 
 | ||||||
|  | What:		/sys/block/zram<id>/mem_limit | ||||||
|  | Date:		August 2014 | ||||||
|  | Contact:	Minchan Kim <minchan@kernel.org> | ||||||
|  | Description: | ||||||
|  | 		The mem_limit file is read/write and specifies the maximum | ||||||
|  | 		amount of memory ZRAM can use to store the compressed data.  The | ||||||
|  | 		limit could be changed in run time and "0" means disable the | ||||||
|  | 		limit.  No limit is the initial state.  Unit: bytes | ||||||
|  |  | ||||||
|  | @ -61,6 +61,14 @@ Users:		hotplug memory remove tools | ||||||
| 		http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils | 		http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | What:           /sys/devices/system/memory/memoryX/valid_zones | ||||||
|  | Date:           July 2014 | ||||||
|  | Contact:	Zhang Zhen <zhenzhang.zhang@huawei.com> | ||||||
|  | Description: | ||||||
|  | 		The file /sys/devices/system/memory/memoryX/valid_zones	is | ||||||
|  | 		read-only and is designed to show which zone this memory | ||||||
|  | 		block can be onlined to. | ||||||
|  | 
 | ||||||
| What:		/sys/devices/system/memoryX/nodeY | What:		/sys/devices/system/memoryX/nodeY | ||||||
| Date:		October 2009 | Date:		October 2009 | ||||||
| Contact:	Linux Memory Management list <linux-mm@kvack.org> | Contact:	Linux Memory Management list <linux-mm@kvack.org> | ||||||
|  |  | ||||||
|  | @ -74,14 +74,30 @@ There is little point creating a zram of greater than twice the size of memory | ||||||
| since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the | since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the | ||||||
| size of the disk when not in use so a huge zram is wasteful. | size of the disk when not in use so a huge zram is wasteful. | ||||||
| 
 | 
 | ||||||
| 5) Activate: | 5) Set memory limit: Optional | ||||||
|  | 	Set memory limit by writing the value to sysfs node 'mem_limit'. | ||||||
|  | 	The value can be either in bytes or you can use mem suffixes. | ||||||
|  | 	In addition, you could change the value in runtime. | ||||||
|  | 	Examples: | ||||||
|  | 	    # limit /dev/zram0 with 50MB memory | ||||||
|  | 	    echo $((50*1024*1024)) > /sys/block/zram0/mem_limit | ||||||
|  | 
 | ||||||
|  | 	    # Using mem suffixes | ||||||
|  | 	    echo 256K > /sys/block/zram0/mem_limit | ||||||
|  | 	    echo 512M > /sys/block/zram0/mem_limit | ||||||
|  | 	    echo 1G > /sys/block/zram0/mem_limit | ||||||
|  | 
 | ||||||
|  | 	    # To disable memory limit | ||||||
|  | 	    echo 0 > /sys/block/zram0/mem_limit | ||||||
|  | 
 | ||||||
|  | 6) Activate: | ||||||
| 	mkswap /dev/zram0 | 	mkswap /dev/zram0 | ||||||
| 	swapon /dev/zram0 | 	swapon /dev/zram0 | ||||||
| 
 | 
 | ||||||
| 	mkfs.ext4 /dev/zram1 | 	mkfs.ext4 /dev/zram1 | ||||||
| 	mount /dev/zram1 /tmp | 	mount /dev/zram1 /tmp | ||||||
| 
 | 
 | ||||||
| 6) Stats: | 7) Stats: | ||||||
| 	Per-device statistics are exported as various nodes under | 	Per-device statistics are exported as various nodes under | ||||||
| 	/sys/block/zram<id>/ | 	/sys/block/zram<id>/ | ||||||
| 		disksize | 		disksize | ||||||
|  | @ -95,12 +111,13 @@ size of the disk when not in use so a huge zram is wasteful. | ||||||
| 		orig_data_size | 		orig_data_size | ||||||
| 		compr_data_size | 		compr_data_size | ||||||
| 		mem_used_total | 		mem_used_total | ||||||
|  | 		mem_used_max | ||||||
| 
 | 
 | ||||||
| 7) Deactivate: | 8) Deactivate: | ||||||
| 	swapoff /dev/zram0 | 	swapoff /dev/zram0 | ||||||
| 	umount /dev/zram1 | 	umount /dev/zram1 | ||||||
| 
 | 
 | ||||||
| 8) Reset: | 9) Reset: | ||||||
| 	Write any positive value to 'reset' sysfs node | 	Write any positive value to 'reset' sysfs node | ||||||
| 	echo 1 > /sys/block/zram0/reset | 	echo 1 > /sys/block/zram0/reset | ||||||
| 	echo 1 > /sys/block/zram1/reset | 	echo 1 > /sys/block/zram1/reset | ||||||
|  |  | ||||||
|  | @ -656,7 +656,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | ||||||
| 			Sets the size of kernel global memory area for | 			Sets the size of kernel global memory area for | ||||||
| 			contiguous memory allocations and optionally the | 			contiguous memory allocations and optionally the | ||||||
| 			placement constraint by the physical address range of | 			placement constraint by the physical address range of | ||||||
| 			memory allocations. For more information, see | 			memory allocations. A value of 0 disables CMA | ||||||
|  | 			altogether. For more information, see | ||||||
| 			include/linux/dma-contiguous.h | 			include/linux/dma-contiguous.h | ||||||
| 
 | 
 | ||||||
| 	cmo_free_hint=	[PPC] Format: { yes | no } | 	cmo_free_hint=	[PPC] Format: { yes | no } | ||||||
|  | @ -3158,6 +3159,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | ||||||
| 
 | 
 | ||||||
| 	slram=		[HW,MTD] | 	slram=		[HW,MTD] | ||||||
| 
 | 
 | ||||||
|  | 	slab_nomerge	[MM] | ||||||
|  | 			Disable merging of slabs with similar size. May be | ||||||
|  | 			necessary if there is some reason to distinguish | ||||||
|  | 			allocs to different slabs. Debug options disable | ||||||
|  | 			merging on their own. | ||||||
|  | 			For more information see Documentation/vm/slub.txt. | ||||||
|  | 
 | ||||||
| 	slab_max_order=	[MM, SLAB] | 	slab_max_order=	[MM, SLAB] | ||||||
| 			Determines the maximum allowed order for slabs. | 			Determines the maximum allowed order for slabs. | ||||||
| 			A high setting may cause OOMs due to memory | 			A high setting may cause OOMs due to memory | ||||||
|  | @ -3193,11 +3201,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | ||||||
| 			For more information see Documentation/vm/slub.txt. | 			For more information see Documentation/vm/slub.txt. | ||||||
| 
 | 
 | ||||||
| 	slub_nomerge	[MM, SLUB] | 	slub_nomerge	[MM, SLUB] | ||||||
| 			Disable merging of slabs with similar size. May be | 			Same with slab_nomerge. This is supported for legacy. | ||||||
| 			necessary if there is some reason to distinguish | 			See slab_nomerge for more information. | ||||||
| 			allocs to different slabs. Debug options disable |  | ||||||
| 			merging on their own. |  | ||||||
| 			For more information see Documentation/vm/slub.txt. |  | ||||||
| 
 | 
 | ||||||
| 	smart2=		[HW] | 	smart2=		[HW] | ||||||
| 			Format: <io1>[,<io2>[,...,<io8>]] | 			Format: <io1>[,<io2>[,...,<io8>]] | ||||||
|  |  | ||||||
|  | @ -155,6 +155,7 @@ Under each memory block, you can see 4 files: | ||||||
| /sys/devices/system/memory/memoryXXX/phys_device | /sys/devices/system/memory/memoryXXX/phys_device | ||||||
| /sys/devices/system/memory/memoryXXX/state | /sys/devices/system/memory/memoryXXX/state | ||||||
| /sys/devices/system/memory/memoryXXX/removable | /sys/devices/system/memory/memoryXXX/removable | ||||||
|  | /sys/devices/system/memory/memoryXXX/valid_zones | ||||||
| 
 | 
 | ||||||
| 'phys_index'      : read-only and contains memory block id, same as XXX. | 'phys_index'      : read-only and contains memory block id, same as XXX. | ||||||
| 'state'           : read-write | 'state'           : read-write | ||||||
|  | @ -170,6 +171,15 @@ Under each memory block, you can see 4 files: | ||||||
|                     block is removable and a value of 0 indicates that |                     block is removable and a value of 0 indicates that | ||||||
|                     it is not removable. A memory block is removable only if |                     it is not removable. A memory block is removable only if | ||||||
|                     every section in the block is removable. |                     every section in the block is removable. | ||||||
|  | 'valid_zones'     : read-only: designed to show which zones this memory block | ||||||
|  | 		    can be onlined to. | ||||||
|  | 		    The first column shows it's default zone. | ||||||
|  | 		    "memory6/valid_zones: Normal Movable" shows this memoryblock | ||||||
|  | 		    can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE | ||||||
|  | 		    by online_movable. | ||||||
|  | 		    "memory7/valid_zones: Movable Normal" shows this memoryblock | ||||||
|  | 		    can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL | ||||||
|  | 		    by online_kernel. | ||||||
| 
 | 
 | ||||||
| NOTE: | NOTE: | ||||||
|   These directories/files appear after physical memory hotplug phase. |   These directories/files appear after physical memory hotplug phase. | ||||||
|  | @ -408,7 +418,6 @@ node if necessary. | ||||||
|   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like |   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like | ||||||
|     sysctl or new control file. |     sysctl or new control file. | ||||||
|   - showing memory block and physical device relationship. |   - showing memory block and physical device relationship. | ||||||
|   - showing memory block is under ZONE_MOVABLE or not |  | ||||||
|   - test and make it better memory offlining. |   - test and make it better memory offlining. | ||||||
|   - support HugeTLB page migration and offlining. |   - support HugeTLB page migration and offlining. | ||||||
|   - memmap removing at memory offline. |   - memmap removing at memory offline. | ||||||
|  |  | ||||||
|  | @ -8,4 +8,5 @@ generic-y += irq_work.h | ||||||
| generic-y += mcs_spinlock.h | generic-y += mcs_spinlock.h | ||||||
| generic-y += preempt.h | generic-y += preempt.h | ||||||
| generic-y += scatterlist.h | generic-y += scatterlist.h | ||||||
|  | generic-y += sections.h | ||||||
| generic-y += trace_clock.h | generic-y += trace_clock.h | ||||||
|  |  | ||||||
|  | @ -1,7 +0,0 @@ | ||||||
| #ifndef _ALPHA_SECTIONS_H |  | ||||||
| #define _ALPHA_SECTIONS_H |  | ||||||
| 
 |  | ||||||
| /* nothing to see, move along */ |  | ||||||
| #include <asm-generic/sections.h> |  | ||||||
| 
 |  | ||||||
| #endif |  | ||||||
|  | @ -14,6 +14,7 @@ config ARM | ||||||
| 	select CLONE_BACKWARDS | 	select CLONE_BACKWARDS | ||||||
| 	select CPU_PM if (SUSPEND || CPU_IDLE) | 	select CPU_PM if (SUSPEND || CPU_IDLE) | ||||||
| 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS | 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS | ||||||
|  | 	select GENERIC_ALLOCATOR | ||||||
| 	select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) | 	select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) | ||||||
| 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP | 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP | ||||||
| 	select GENERIC_IDLE_POLL_SETUP | 	select GENERIC_IDLE_POLL_SETUP | ||||||
|  | @ -61,6 +62,7 @@ config ARM | ||||||
| 	select HAVE_PERF_EVENTS | 	select HAVE_PERF_EVENTS | ||||||
| 	select HAVE_PERF_REGS | 	select HAVE_PERF_REGS | ||||||
| 	select HAVE_PERF_USER_STACK_DUMP | 	select HAVE_PERF_USER_STACK_DUMP | ||||||
|  | 	select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | ||||||
| 	select HAVE_REGS_AND_STACK_ACCESS_API | 	select HAVE_REGS_AND_STACK_ACCESS_API | ||||||
| 	select HAVE_SYSCALL_TRACEPOINTS | 	select HAVE_SYSCALL_TRACEPOINTS | ||||||
| 	select HAVE_UID16 | 	select HAVE_UID16 | ||||||
|  | @ -1659,6 +1661,10 @@ config ARCH_SELECT_MEMORY_MODEL | ||||||
| config HAVE_ARCH_PFN_VALID | config HAVE_ARCH_PFN_VALID | ||||||
| 	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM | 	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM | ||||||
| 
 | 
 | ||||||
|  | config HAVE_GENERIC_RCU_GUP | ||||||
|  | 	def_bool y | ||||||
|  | 	depends on ARM_LPAE | ||||||
|  | 
 | ||||||
| config HIGHMEM | config HIGHMEM | ||||||
| 	bool "High Memory Support" | 	bool "High Memory Support" | ||||||
| 	depends on MMU | 	depends on MMU | ||||||
|  |  | ||||||
|  | @ -182,6 +182,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | ||||||
| #define pmd_addr_end(addr,end) (end) | #define pmd_addr_end(addr,end) (end) | ||||||
| 
 | 
 | ||||||
| #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) | #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) | ||||||
|  | #define pte_special(pte)	(0) | ||||||
|  | static inline pte_t pte_mkspecial(pte_t pte) { return pte; } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * We don't have huge page support for short descriptors, for the moment |  * We don't have huge page support for short descriptors, for the moment | ||||||
|  |  | ||||||
|  | @ -213,10 +213,19 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | ||||||
| #define pmd_isclear(pmd, val)	(!(pmd_val(pmd) & (val))) | #define pmd_isclear(pmd, val)	(!(pmd_val(pmd) & (val))) | ||||||
| 
 | 
 | ||||||
| #define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF)) | #define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF)) | ||||||
|  | #define pte_special(pte)	(pte_isset((pte), L_PTE_SPECIAL)) | ||||||
|  | static inline pte_t pte_mkspecial(pte_t pte) | ||||||
|  | { | ||||||
|  | 	pte_val(pte) |= L_PTE_SPECIAL; | ||||||
|  | 	return pte; | ||||||
|  | } | ||||||
|  | #define	__HAVE_ARCH_PTE_SPECIAL | ||||||
| 
 | 
 | ||||||
| #define __HAVE_ARCH_PMD_WRITE | #define __HAVE_ARCH_PMD_WRITE | ||||||
| #define pmd_write(pmd)		(pmd_isclear((pmd), L_PMD_SECT_RDONLY)) | #define pmd_write(pmd)		(pmd_isclear((pmd), L_PMD_SECT_RDONLY)) | ||||||
| #define pmd_dirty(pmd)		(pmd_isset((pmd), L_PMD_SECT_DIRTY)) | #define pmd_dirty(pmd)		(pmd_isset((pmd), L_PMD_SECT_DIRTY)) | ||||||
|  | #define pud_page(pud)		pmd_page(__pmd(pud_val(pud))) | ||||||
|  | #define pud_write(pud)		pmd_write(__pmd(pud_val(pud))) | ||||||
| 
 | 
 | ||||||
| #define pmd_hugewillfault(pmd)	(!pmd_young(pmd) || !pmd_write(pmd)) | #define pmd_hugewillfault(pmd)	(!pmd_young(pmd) || !pmd_write(pmd)) | ||||||
| #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd)) | #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd)) | ||||||
|  | @ -224,6 +233,12 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | ||||||
| #ifdef CONFIG_TRANSPARENT_HUGEPAGE | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||||||
| #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !pmd_table(pmd)) | #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !pmd_table(pmd)) | ||||||
| #define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING)) | #define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING)) | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||||||
|  | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | ||||||
|  | 			  pmd_t *pmdp); | ||||||
|  | #endif | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #define PMD_BIT_FUNC(fn,op) \ | #define PMD_BIT_FUNC(fn,op) \ | ||||||
|  |  | ||||||
|  | @ -226,7 +226,6 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) | ||||||
| #define pte_dirty(pte)		(pte_isset((pte), L_PTE_DIRTY)) | #define pte_dirty(pte)		(pte_isset((pte), L_PTE_DIRTY)) | ||||||
| #define pte_young(pte)		(pte_isset((pte), L_PTE_YOUNG)) | #define pte_young(pte)		(pte_isset((pte), L_PTE_YOUNG)) | ||||||
| #define pte_exec(pte)		(pte_isclear((pte), L_PTE_XN)) | #define pte_exec(pte)		(pte_isclear((pte), L_PTE_XN)) | ||||||
| #define pte_special(pte)	(0) |  | ||||||
| 
 | 
 | ||||||
| #define pte_valid_user(pte)	\ | #define pte_valid_user(pte)	\ | ||||||
| 	(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte)) | 	(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte)) | ||||||
|  | @ -245,6 +244,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | ||||||
| 	unsigned long ext = 0; | 	unsigned long ext = 0; | ||||||
| 
 | 
 | ||||||
| 	if (addr < TASK_SIZE && pte_valid_user(pteval)) { | 	if (addr < TASK_SIZE && pte_valid_user(pteval)) { | ||||||
|  | 		if (!pte_special(pteval)) | ||||||
| 			__sync_icache_dcache(pteval); | 			__sync_icache_dcache(pteval); | ||||||
| 		ext |= PTE_EXT_NG; | 		ext |= PTE_EXT_NG; | ||||||
| 	} | 	} | ||||||
|  | @ -264,8 +264,6 @@ PTE_BIT_FUNC(mkyoung,   |= L_PTE_YOUNG); | ||||||
| PTE_BIT_FUNC(mkexec,   &= ~L_PTE_XN); | PTE_BIT_FUNC(mkexec,   &= ~L_PTE_XN); | ||||||
| PTE_BIT_FUNC(mknexec,   |= L_PTE_XN); | PTE_BIT_FUNC(mknexec,   |= L_PTE_XN); | ||||||
| 
 | 
 | ||||||
| static inline pte_t pte_mkspecial(pte_t pte) { return pte; } |  | ||||||
| 
 |  | ||||||
| static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | ||||||
| { | { | ||||||
| 	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | | 	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | | ||||||
|  |  | ||||||
|  | @ -35,12 +35,39 @@ | ||||||
| 
 | 
 | ||||||
| #define MMU_GATHER_BUNDLE	8 | #define MMU_GATHER_BUNDLE	8 | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | static inline void __tlb_remove_table(void *_table) | ||||||
|  | { | ||||||
|  | 	free_page_and_swap_cache((struct page *)_table); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct mmu_table_batch { | ||||||
|  | 	struct rcu_head		rcu; | ||||||
|  | 	unsigned int		nr; | ||||||
|  | 	void			*tables[0]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | #define MAX_TABLE_BATCH		\ | ||||||
|  | 	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) | ||||||
|  | 
 | ||||||
|  | extern void tlb_table_flush(struct mmu_gather *tlb); | ||||||
|  | extern void tlb_remove_table(struct mmu_gather *tlb, void *table); | ||||||
|  | 
 | ||||||
|  | #define tlb_remove_entry(tlb, entry)	tlb_remove_table(tlb, entry) | ||||||
|  | #else | ||||||
|  | #define tlb_remove_entry(tlb, entry)	tlb_remove_page(tlb, entry) | ||||||
|  | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * TLB handling.  This allows us to remove pages from the page |  * TLB handling.  This allows us to remove pages from the page | ||||||
|  * tables, and efficiently handle the TLB issues. |  * tables, and efficiently handle the TLB issues. | ||||||
|  */ |  */ | ||||||
| struct mmu_gather { | struct mmu_gather { | ||||||
| 	struct mm_struct	*mm; | 	struct mm_struct	*mm; | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | 	struct mmu_table_batch	*batch; | ||||||
|  | 	unsigned int		need_flush; | ||||||
|  | #endif | ||||||
| 	unsigned int		fullmm; | 	unsigned int		fullmm; | ||||||
| 	struct vm_area_struct	*vma; | 	struct vm_area_struct	*vma; | ||||||
| 	unsigned long		start, end; | 	unsigned long		start, end; | ||||||
|  | @ -101,6 +128,9 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) | ||||||
| static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) | static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) | ||||||
| { | { | ||||||
| 	tlb_flush(tlb); | 	tlb_flush(tlb); | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | 	tlb_table_flush(tlb); | ||||||
|  | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) | static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) | ||||||
|  | @ -129,6 +159,10 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start | ||||||
| 	tlb->pages = tlb->local; | 	tlb->pages = tlb->local; | ||||||
| 	tlb->nr = 0; | 	tlb->nr = 0; | ||||||
| 	__tlb_alloc_page(tlb); | 	__tlb_alloc_page(tlb); | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | 	tlb->batch = NULL; | ||||||
|  | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void | static inline void | ||||||
|  | @ -205,7 +239,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, | ||||||
| 	tlb_add_flush(tlb, addr + SZ_1M); | 	tlb_add_flush(tlb, addr + SZ_1M); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 	tlb_remove_page(tlb, pte); | 	tlb_remove_entry(tlb, pte); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | ||||||
|  | @ -213,7 +247,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | ||||||
| { | { | ||||||
| #ifdef CONFIG_ARM_LPAE | #ifdef CONFIG_ARM_LPAE | ||||||
| 	tlb_add_flush(tlb, addr); | 	tlb_add_flush(tlb, addr); | ||||||
| 	tlb_remove_page(tlb, virt_to_page(pmdp)); | 	tlb_remove_entry(tlb, virt_to_page(pmdp)); | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -21,8 +21,7 @@ | ||||||
| #include <asm/idmap.h> | #include <asm/idmap.h> | ||||||
| #include <asm/suspend.h> | #include <asm/suspend.h> | ||||||
| #include <asm/memory.h> | #include <asm/memory.h> | ||||||
| 
 | #include <asm/sections.h> | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 | 
 | ||||||
| int pfn_is_nosave(unsigned long pfn) | int pfn_is_nosave(unsigned long pfn) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ | ||||||
| #include <linux/bootmem.h> | #include <linux/bootmem.h> | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/mm.h> | #include <linux/mm.h> | ||||||
|  | #include <linux/genalloc.h> | ||||||
| #include <linux/gfp.h> | #include <linux/gfp.h> | ||||||
| #include <linux/errno.h> | #include <linux/errno.h> | ||||||
| #include <linux/list.h> | #include <linux/list.h> | ||||||
|  | @ -298,57 +299,29 @@ static void * | ||||||
| __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | ||||||
| 	const void *caller) | 	const void *caller) | ||||||
| { | { | ||||||
| 	struct vm_struct *area; |  | ||||||
| 	unsigned long addr; |  | ||||||
| 
 |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * DMA allocation can be mapped to user space, so lets | 	 * DMA allocation can be mapped to user space, so lets | ||||||
| 	 * set VM_USERMAP flags too. | 	 * set VM_USERMAP flags too. | ||||||
| 	 */ | 	 */ | ||||||
| 	area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, | 	return dma_common_contiguous_remap(page, size, | ||||||
| 				  caller); | 			VM_ARM_DMA_CONSISTENT | VM_USERMAP, | ||||||
| 	if (!area) | 			prot, caller); | ||||||
| 		return NULL; |  | ||||||
| 	addr = (unsigned long)area->addr; |  | ||||||
| 	area->phys_addr = __pfn_to_phys(page_to_pfn(page)); |  | ||||||
| 
 |  | ||||||
| 	if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { |  | ||||||
| 		vunmap((void *)addr); |  | ||||||
| 		return NULL; |  | ||||||
| 	} |  | ||||||
| 	return (void *)addr; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __dma_free_remap(void *cpu_addr, size_t size) | static void __dma_free_remap(void *cpu_addr, size_t size) | ||||||
| { | { | ||||||
| 	unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; | 	dma_common_free_remap(cpu_addr, size, | ||||||
| 	struct vm_struct *area = find_vm_area(cpu_addr); | 			VM_ARM_DMA_CONSISTENT | VM_USERMAP); | ||||||
| 	if (!area || (area->flags & flags) != flags) { |  | ||||||
| 		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); |  | ||||||
| 		return; |  | ||||||
| 	} |  | ||||||
| 	unmap_kernel_range((unsigned long)cpu_addr, size); |  | ||||||
| 	vunmap(cpu_addr); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K | #define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K | ||||||
|  | static struct gen_pool *atomic_pool; | ||||||
| 
 | 
 | ||||||
| struct dma_pool { | static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; | ||||||
| 	size_t size; |  | ||||||
| 	spinlock_t lock; |  | ||||||
| 	unsigned long *bitmap; |  | ||||||
| 	unsigned long nr_pages; |  | ||||||
| 	void *vaddr; |  | ||||||
| 	struct page **pages; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static struct dma_pool atomic_pool = { |  | ||||||
| 	.size = DEFAULT_DMA_COHERENT_POOL_SIZE, |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| static int __init early_coherent_pool(char *p) | static int __init early_coherent_pool(char *p) | ||||||
| { | { | ||||||
| 	atomic_pool.size = memparse(p, &p); | 	atomic_pool_size = memparse(p, &p); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| early_param("coherent_pool", early_coherent_pool); | early_param("coherent_pool", early_coherent_pool); | ||||||
|  | @ -358,14 +331,14 @@ void __init init_dma_coherent_pool_size(unsigned long size) | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Catch any attempt to set the pool size too late. | 	 * Catch any attempt to set the pool size too late. | ||||||
| 	 */ | 	 */ | ||||||
| 	BUG_ON(atomic_pool.vaddr); | 	BUG_ON(atomic_pool); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Set architecture specific coherent pool size only if | 	 * Set architecture specific coherent pool size only if | ||||||
| 	 * it has not been changed by kernel command line parameter. | 	 * it has not been changed by kernel command line parameter. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) | 	if (atomic_pool_size == DEFAULT_DMA_COHERENT_POOL_SIZE) | ||||||
| 		atomic_pool.size = size; | 		atomic_pool_size = size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -373,52 +346,44 @@ void __init init_dma_coherent_pool_size(unsigned long size) | ||||||
|  */ |  */ | ||||||
| static int __init atomic_pool_init(void) | static int __init atomic_pool_init(void) | ||||||
| { | { | ||||||
| 	struct dma_pool *pool = &atomic_pool; |  | ||||||
| 	pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); | 	pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); | ||||||
| 	gfp_t gfp = GFP_KERNEL | GFP_DMA; | 	gfp_t gfp = GFP_KERNEL | GFP_DMA; | ||||||
| 	unsigned long nr_pages = pool->size >> PAGE_SHIFT; |  | ||||||
| 	unsigned long *bitmap; |  | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 	struct page **pages; |  | ||||||
| 	void *ptr; | 	void *ptr; | ||||||
| 	int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); |  | ||||||
| 
 | 
 | ||||||
| 	bitmap = kzalloc(bitmap_size, GFP_KERNEL); | 	atomic_pool = gen_pool_create(PAGE_SHIFT, -1); | ||||||
| 	if (!bitmap) | 	if (!atomic_pool) | ||||||
| 		goto no_bitmap; | 		goto out; | ||||||
| 
 |  | ||||||
| 	pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); |  | ||||||
| 	if (!pages) |  | ||||||
| 		goto no_pages; |  | ||||||
| 
 | 
 | ||||||
| 	if (dev_get_cma_area(NULL)) | 	if (dev_get_cma_area(NULL)) | ||||||
| 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, | 		ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, | ||||||
| 					      atomic_pool_init); | 					      &page, atomic_pool_init); | ||||||
| 	else | 	else | ||||||
| 		ptr = __alloc_remap_buffer(NULL, pool->size, gfp, prot, &page, | 		ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, | ||||||
| 					   atomic_pool_init); | 					   &page, atomic_pool_init); | ||||||
| 	if (ptr) { | 	if (ptr) { | ||||||
| 		int i; | 		int ret; | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < nr_pages; i++) | 		ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, | ||||||
| 			pages[i] = page + i; | 					page_to_phys(page), | ||||||
|  | 					atomic_pool_size, -1); | ||||||
|  | 		if (ret) | ||||||
|  | 			goto destroy_genpool; | ||||||
| 
 | 
 | ||||||
| 		spin_lock_init(&pool->lock); | 		gen_pool_set_algo(atomic_pool, | ||||||
| 		pool->vaddr = ptr; | 				gen_pool_first_fit_order_align, | ||||||
| 		pool->pages = pages; | 				(void *)PAGE_SHIFT); | ||||||
| 		pool->bitmap = bitmap; | 		pr_info("DMA: preallocated %zd KiB pool for atomic coherent allocations\n", | ||||||
| 		pool->nr_pages = nr_pages; | 		       atomic_pool_size / 1024); | ||||||
| 		pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", |  | ||||||
| 		       (unsigned)pool->size / 1024); |  | ||||||
| 		return 0; | 		return 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	kfree(pages); | destroy_genpool: | ||||||
| no_pages: | 	gen_pool_destroy(atomic_pool); | ||||||
| 	kfree(bitmap); | 	atomic_pool = NULL; | ||||||
| no_bitmap: | out: | ||||||
| 	pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", | 	pr_err("DMA: failed to allocate %zx KiB pool for atomic coherent allocation\n", | ||||||
| 	       (unsigned)pool->size / 1024); | 	       atomic_pool_size / 1024); | ||||||
| 	return -ENOMEM; | 	return -ENOMEM; | ||||||
| } | } | ||||||
| /*
 | /*
 | ||||||
|  | @ -522,76 +487,36 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, | ||||||
| 
 | 
 | ||||||
| static void *__alloc_from_pool(size_t size, struct page **ret_page) | static void *__alloc_from_pool(size_t size, struct page **ret_page) | ||||||
| { | { | ||||||
| 	struct dma_pool *pool = &atomic_pool; | 	unsigned long val; | ||||||
| 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; |  | ||||||
| 	unsigned int pageno; |  | ||||||
| 	unsigned long flags; |  | ||||||
| 	void *ptr = NULL; | 	void *ptr = NULL; | ||||||
| 	unsigned long align_mask; |  | ||||||
| 
 | 
 | ||||||
| 	if (!pool->vaddr) { | 	if (!atomic_pool) { | ||||||
| 		WARN(1, "coherent pool not initialised!\n"); | 		WARN(1, "coherent pool not initialised!\n"); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	val = gen_pool_alloc(atomic_pool, size); | ||||||
| 	 * Align the region allocation - allocations from pool are rather | 	if (val) { | ||||||
| 	 * small, so align them to their order in pages, minimum is a page | 		phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); | ||||||
| 	 * size. This helps reduce fragmentation of the DMA space. |  | ||||||
| 	 */ |  | ||||||
| 	align_mask = (1 << get_order(size)) - 1; |  | ||||||
| 
 | 
 | ||||||
| 	spin_lock_irqsave(&pool->lock, flags); | 		*ret_page = phys_to_page(phys); | ||||||
| 	pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, | 		ptr = (void *)val; | ||||||
| 					    0, count, align_mask); |  | ||||||
| 	if (pageno < pool->nr_pages) { |  | ||||||
| 		bitmap_set(pool->bitmap, pageno, count); |  | ||||||
| 		ptr = pool->vaddr + PAGE_SIZE * pageno; |  | ||||||
| 		*ret_page = pool->pages[pageno]; |  | ||||||
| 	} else { |  | ||||||
| 		pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" |  | ||||||
| 			    "Please increase it with coherent_pool= kernel parameter!\n", |  | ||||||
| 			    (unsigned)pool->size / 1024); |  | ||||||
| 	} | 	} | ||||||
| 	spin_unlock_irqrestore(&pool->lock, flags); |  | ||||||
| 
 | 
 | ||||||
| 	return ptr; | 	return ptr; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool __in_atomic_pool(void *start, size_t size) | static bool __in_atomic_pool(void *start, size_t size) | ||||||
| { | { | ||||||
| 	struct dma_pool *pool = &atomic_pool; | 	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); | ||||||
| 	void *end = start + size; |  | ||||||
| 	void *pool_start = pool->vaddr; |  | ||||||
| 	void *pool_end = pool->vaddr + pool->size; |  | ||||||
| 
 |  | ||||||
| 	if (start < pool_start || start >= pool_end) |  | ||||||
| 		return false; |  | ||||||
| 
 |  | ||||||
| 	if (end <= pool_end) |  | ||||||
| 		return true; |  | ||||||
| 
 |  | ||||||
| 	WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", |  | ||||||
| 	     start, end - 1, pool_start, pool_end - 1); |  | ||||||
| 
 |  | ||||||
| 	return false; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int __free_from_pool(void *start, size_t size) | static int __free_from_pool(void *start, size_t size) | ||||||
| { | { | ||||||
| 	struct dma_pool *pool = &atomic_pool; |  | ||||||
| 	unsigned long pageno, count; |  | ||||||
| 	unsigned long flags; |  | ||||||
| 
 |  | ||||||
| 	if (!__in_atomic_pool(start, size)) | 	if (!__in_atomic_pool(start, size)) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| 	pageno = (start - pool->vaddr) >> PAGE_SHIFT; | 	gen_pool_free(atomic_pool, (unsigned long)start, size); | ||||||
| 	count = size >> PAGE_SHIFT; |  | ||||||
| 
 |  | ||||||
| 	spin_lock_irqsave(&pool->lock, flags); |  | ||||||
| 	bitmap_clear(pool->bitmap, pageno, count); |  | ||||||
| 	spin_unlock_irqrestore(&pool->lock, flags); |  | ||||||
| 
 | 
 | ||||||
| 	return 1; | 	return 1; | ||||||
| } | } | ||||||
|  | @ -1271,29 +1196,8 @@ static void * | ||||||
| __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, | __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, | ||||||
| 		    const void *caller) | 		    const void *caller) | ||||||
| { | { | ||||||
| 	unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; | 	return dma_common_pages_remap(pages, size, | ||||||
| 	struct vm_struct *area; | 			VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); | ||||||
| 	unsigned long p; |  | ||||||
| 
 |  | ||||||
| 	area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, |  | ||||||
| 				  caller); |  | ||||||
| 	if (!area) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	area->pages = pages; |  | ||||||
| 	area->nr_pages = nr_pages; |  | ||||||
| 	p = (unsigned long)area->addr; |  | ||||||
| 
 |  | ||||||
| 	for (i = 0; i < nr_pages; i++) { |  | ||||||
| 		phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); |  | ||||||
| 		if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) |  | ||||||
| 			goto err; |  | ||||||
| 		p += PAGE_SIZE; |  | ||||||
| 	} |  | ||||||
| 	return area->addr; |  | ||||||
| err: |  | ||||||
| 	unmap_kernel_range((unsigned long)area->addr, size); |  | ||||||
| 	vunmap(area->addr); |  | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1355,11 +1259,13 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si | ||||||
| 
 | 
 | ||||||
| static struct page **__atomic_get_pages(void *addr) | static struct page **__atomic_get_pages(void *addr) | ||||||
| { | { | ||||||
| 	struct dma_pool *pool = &atomic_pool; | 	struct page *page; | ||||||
| 	struct page **pages = pool->pages; | 	phys_addr_t phys; | ||||||
| 	int offs = (addr - pool->vaddr) >> PAGE_SHIFT; |  | ||||||
| 
 | 
 | ||||||
| 	return pages + offs; | 	phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); | ||||||
|  | 	page = phys_to_page(phys); | ||||||
|  | 
 | ||||||
|  | 	return (struct page **)page; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) | static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) | ||||||
|  | @ -1501,8 +1407,8 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { | 	if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { | ||||||
| 		unmap_kernel_range((unsigned long)cpu_addr, size); | 		dma_common_free_remap(cpu_addr, size, | ||||||
| 		vunmap(cpu_addr); | 			VM_ARM_DMA_CONSISTENT | VM_USERMAP); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	__iommu_remove_mapping(dev, handle, size); | 	__iommu_remove_mapping(dev, handle, size); | ||||||
|  |  | ||||||
|  | @ -400,3 +400,18 @@ void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned l | ||||||
| 	 */ | 	 */ | ||||||
| 	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); | 	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | ||||||
|  | 			  pmd_t *pmdp) | ||||||
|  | { | ||||||
|  | 	pmd_t pmd = pmd_mksplitting(*pmdp); | ||||||
|  | 	VM_BUG_ON(address & ~PMD_MASK); | ||||||
|  | 	set_pmd_at(vma->vm_mm, address, pmdp, pmd); | ||||||
|  | 
 | ||||||
|  | 	/* dummy IPI to serialise against fast_gup */ | ||||||
|  | 	kick_all_cpus_sync(); | ||||||
|  | } | ||||||
|  | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | ||||||
|  | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||||||
|  |  | ||||||
|  | @ -322,7 +322,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) | ||||||
| 	 * reserve memory for DMA contigouos allocations, | 	 * reserve memory for DMA contigouos allocations, | ||||||
| 	 * must come from DMA area inside low memory | 	 * must come from DMA area inside low memory | ||||||
| 	 */ | 	 */ | ||||||
| 	dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); | 	dma_contiguous_reserve(arm_dma_limit); | ||||||
| 
 | 
 | ||||||
| 	arm_memblock_steal_permitted = false; | 	arm_memblock_steal_permitted = false; | ||||||
| 	memblock_dump_all(); | 	memblock_dump_all(); | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ config ARM64 | ||||||
| 	select COMMON_CLK | 	select COMMON_CLK | ||||||
| 	select CPU_PM if (SUSPEND || CPU_IDLE) | 	select CPU_PM if (SUSPEND || CPU_IDLE) | ||||||
| 	select DCACHE_WORD_ACCESS | 	select DCACHE_WORD_ACCESS | ||||||
|  | 	select GENERIC_ALLOCATOR | ||||||
| 	select GENERIC_CLOCKEVENTS | 	select GENERIC_CLOCKEVENTS | ||||||
| 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP | 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP | ||||||
| 	select GENERIC_CPU_AUTOPROBE | 	select GENERIC_CPU_AUTOPROBE | ||||||
|  | @ -56,6 +57,7 @@ config ARM64 | ||||||
| 	select HAVE_PERF_EVENTS | 	select HAVE_PERF_EVENTS | ||||||
| 	select HAVE_PERF_REGS | 	select HAVE_PERF_REGS | ||||||
| 	select HAVE_PERF_USER_STACK_DUMP | 	select HAVE_PERF_USER_STACK_DUMP | ||||||
|  | 	select HAVE_RCU_TABLE_FREE | ||||||
| 	select HAVE_SYSCALL_TRACEPOINTS | 	select HAVE_SYSCALL_TRACEPOINTS | ||||||
| 	select IRQ_DOMAIN | 	select IRQ_DOMAIN | ||||||
| 	select MODULES_USE_ELF_RELA | 	select MODULES_USE_ELF_RELA | ||||||
|  | @ -109,6 +111,9 @@ config GENERIC_CALIBRATE_DELAY | ||||||
| config ZONE_DMA | config ZONE_DMA | ||||||
| 	def_bool y | 	def_bool y | ||||||
| 
 | 
 | ||||||
|  | config HAVE_GENERIC_RCU_GUP | ||||||
|  | 	def_bool y | ||||||
|  | 
 | ||||||
| config ARCH_DMA_ADDR_T_64BIT | config ARCH_DMA_ADDR_T_64BIT | ||||||
| 	def_bool y | 	def_bool y | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -244,6 +244,16 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | ||||||
| 
 | 
 | ||||||
| #define __HAVE_ARCH_PTE_SPECIAL | #define __HAVE_ARCH_PTE_SPECIAL | ||||||
| 
 | 
 | ||||||
|  | static inline pte_t pud_pte(pud_t pud) | ||||||
|  | { | ||||||
|  | 	return __pte(pud_val(pud)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline pmd_t pud_pmd(pud_t pud) | ||||||
|  | { | ||||||
|  | 	return __pmd(pud_val(pud)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline pte_t pmd_pte(pmd_t pmd) | static inline pte_t pmd_pte(pmd_t pmd) | ||||||
| { | { | ||||||
| 	return __pte(pmd_val(pmd)); | 	return __pte(pmd_val(pmd)); | ||||||
|  | @ -261,7 +271,13 @@ static inline pmd_t pte_pmd(pte_t pte) | ||||||
| #ifdef CONFIG_TRANSPARENT_HUGEPAGE | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||||||
| #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) | #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) | ||||||
| #define pmd_trans_splitting(pmd)	pte_special(pmd_pte(pmd)) | #define pmd_trans_splitting(pmd)	pte_special(pmd_pte(pmd)) | ||||||
| #endif | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||||||
|  | struct vm_area_struct; | ||||||
|  | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | ||||||
|  | 			  pmd_t *pmdp); | ||||||
|  | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | ||||||
|  | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||||||
| 
 | 
 | ||||||
| #define pmd_young(pmd)		pte_young(pmd_pte(pmd)) | #define pmd_young(pmd)		pte_young(pmd_pte(pmd)) | ||||||
| #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd))) | #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd))) | ||||||
|  | @ -282,6 +298,7 @@ static inline pmd_t pte_pmd(pte_t pte) | ||||||
| #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot) | #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot) | ||||||
| 
 | 
 | ||||||
| #define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) | #define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) | ||||||
|  | #define pud_write(pud)		pte_write(pud_pte(pud)) | ||||||
| #define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) | #define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) | ||||||
| 
 | 
 | ||||||
| #define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) | #define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) | ||||||
|  | @ -383,6 +400,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | ||||||
| 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); | 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #define pud_page(pud)           pmd_page(pud_pmd(pud)) | ||||||
|  | 
 | ||||||
| #endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ | #endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ | ||||||
| 
 | 
 | ||||||
| #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | ||||||
|  |  | ||||||
|  | @ -23,6 +23,20 @@ | ||||||
| 
 | 
 | ||||||
| #include <asm-generic/tlb.h> | #include <asm-generic/tlb.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/pagemap.h> | ||||||
|  | #include <linux/swap.h> | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | 
 | ||||||
|  | #define tlb_remove_entry(tlb, entry)	tlb_remove_table(tlb, entry) | ||||||
|  | static inline void __tlb_remove_table(void *_table) | ||||||
|  | { | ||||||
|  | 	free_page_and_swap_cache((struct page *)_table); | ||||||
|  | } | ||||||
|  | #else | ||||||
|  | #define tlb_remove_entry(tlb, entry)	tlb_remove_page(tlb, entry) | ||||||
|  | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * There's three ways the TLB shootdown code is used: |  * There's three ways the TLB shootdown code is used: | ||||||
|  *  1. Unmapping a range of vmas.  See zap_page_range(), unmap_region(). |  *  1. Unmapping a range of vmas.  See zap_page_range(), unmap_region(). | ||||||
|  | @ -88,7 +102,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, | ||||||
| { | { | ||||||
| 	pgtable_page_dtor(pte); | 	pgtable_page_dtor(pte); | ||||||
| 	tlb_add_flush(tlb, addr); | 	tlb_add_flush(tlb, addr); | ||||||
| 	tlb_remove_page(tlb, pte); | 	tlb_remove_entry(tlb, pte); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | ||||||
|  | @ -96,7 +110,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | ||||||
| 				  unsigned long addr) | 				  unsigned long addr) | ||||||
| { | { | ||||||
| 	tlb_add_flush(tlb, addr); | 	tlb_add_flush(tlb, addr); | ||||||
| 	tlb_remove_page(tlb, virt_to_page(pmdp)); | 	tlb_remove_entry(tlb, virt_to_page(pmdp)); | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | @ -105,7 +119,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, | ||||||
| 				  unsigned long addr) | 				  unsigned long addr) | ||||||
| { | { | ||||||
| 	tlb_add_flush(tlb, addr); | 	tlb_add_flush(tlb, addr); | ||||||
| 	tlb_remove_page(tlb, virt_to_page(pudp)); | 	tlb_remove_entry(tlb, virt_to_page(pudp)); | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include <linux/gfp.h> | #include <linux/gfp.h> | ||||||
| #include <linux/export.h> | #include <linux/export.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
|  | #include <linux/genalloc.h> | ||||||
| #include <linux/dma-mapping.h> | #include <linux/dma-mapping.h> | ||||||
| #include <linux/dma-contiguous.h> | #include <linux/dma-contiguous.h> | ||||||
| #include <linux/vmalloc.h> | #include <linux/vmalloc.h> | ||||||
|  | @ -38,6 +39,54 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, | ||||||
| 	return prot; | 	return prot; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static struct gen_pool *atomic_pool; | ||||||
|  | 
 | ||||||
|  | #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K | ||||||
|  | static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; | ||||||
|  | 
 | ||||||
|  | static int __init early_coherent_pool(char *p) | ||||||
|  | { | ||||||
|  | 	atomic_pool_size = memparse(p, &p); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | early_param("coherent_pool", early_coherent_pool); | ||||||
|  | 
 | ||||||
|  | static void *__alloc_from_pool(size_t size, struct page **ret_page) | ||||||
|  | { | ||||||
|  | 	unsigned long val; | ||||||
|  | 	void *ptr = NULL; | ||||||
|  | 
 | ||||||
|  | 	if (!atomic_pool) { | ||||||
|  | 		WARN(1, "coherent pool not initialised!\n"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	val = gen_pool_alloc(atomic_pool, size); | ||||||
|  | 	if (val) { | ||||||
|  | 		phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); | ||||||
|  | 
 | ||||||
|  | 		*ret_page = phys_to_page(phys); | ||||||
|  | 		ptr = (void *)val; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return ptr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool __in_atomic_pool(void *start, size_t size) | ||||||
|  | { | ||||||
|  | 	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int __free_from_pool(void *start, size_t size) | ||||||
|  | { | ||||||
|  | 	if (!__in_atomic_pool(start, size)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	gen_pool_free(atomic_pool, (unsigned long)start, size); | ||||||
|  | 
 | ||||||
|  | 	return 1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void *__dma_alloc_coherent(struct device *dev, size_t size, | static void *__dma_alloc_coherent(struct device *dev, size_t size, | ||||||
| 				  dma_addr_t *dma_handle, gfp_t flags, | 				  dma_addr_t *dma_handle, gfp_t flags, | ||||||
| 				  struct dma_attrs *attrs) | 				  struct dma_attrs *attrs) | ||||||
|  | @ -50,7 +99,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, | ||||||
| 	if (IS_ENABLED(CONFIG_ZONE_DMA) && | 	if (IS_ENABLED(CONFIG_ZONE_DMA) && | ||||||
| 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32)) | 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32)) | ||||||
| 		flags |= GFP_DMA; | 		flags |= GFP_DMA; | ||||||
| 	if (IS_ENABLED(CONFIG_DMA_CMA)) { | 	if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { | ||||||
| 		struct page *page; | 		struct page *page; | ||||||
| 
 | 
 | ||||||
| 		size = PAGE_ALIGN(size); | 		size = PAGE_ALIGN(size); | ||||||
|  | @ -70,50 +119,54 @@ static void __dma_free_coherent(struct device *dev, size_t size, | ||||||
| 				void *vaddr, dma_addr_t dma_handle, | 				void *vaddr, dma_addr_t dma_handle, | ||||||
| 				struct dma_attrs *attrs) | 				struct dma_attrs *attrs) | ||||||
| { | { | ||||||
|  | 	bool freed; | ||||||
|  | 	phys_addr_t paddr = dma_to_phys(dev, dma_handle); | ||||||
|  | 
 | ||||||
| 	if (dev == NULL) { | 	if (dev == NULL) { | ||||||
| 		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | 		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | ||||||
| 		return; | 		return; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (IS_ENABLED(CONFIG_DMA_CMA)) { | 	freed = dma_release_from_contiguous(dev, | ||||||
| 		phys_addr_t paddr = dma_to_phys(dev, dma_handle); |  | ||||||
| 
 |  | ||||||
| 		dma_release_from_contiguous(dev, |  | ||||||
| 					phys_to_page(paddr), | 					phys_to_page(paddr), | ||||||
| 					size >> PAGE_SHIFT); | 					size >> PAGE_SHIFT); | ||||||
| 	} else { | 	if (!freed) | ||||||
| 		swiotlb_free_coherent(dev, size, vaddr, dma_handle); | 		swiotlb_free_coherent(dev, size, vaddr, dma_handle); | ||||||
| } | } | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| static void *__dma_alloc_noncoherent(struct device *dev, size_t size, | static void *__dma_alloc_noncoherent(struct device *dev, size_t size, | ||||||
| 				     dma_addr_t *dma_handle, gfp_t flags, | 				     dma_addr_t *dma_handle, gfp_t flags, | ||||||
| 				     struct dma_attrs *attrs) | 				     struct dma_attrs *attrs) | ||||||
| { | { | ||||||
| 	struct page *page, **map; | 	struct page *page; | ||||||
| 	void *ptr, *coherent_ptr; | 	void *ptr, *coherent_ptr; | ||||||
| 	int order, i; |  | ||||||
| 
 | 
 | ||||||
| 	size = PAGE_ALIGN(size); | 	size = PAGE_ALIGN(size); | ||||||
| 	order = get_order(size); | 
 | ||||||
|  | 	if (!(flags & __GFP_WAIT)) { | ||||||
|  | 		struct page *page = NULL; | ||||||
|  | 		void *addr = __alloc_from_pool(size, &page); | ||||||
|  | 
 | ||||||
|  | 		if (addr) | ||||||
|  | 			*dma_handle = phys_to_dma(dev, page_to_phys(page)); | ||||||
|  | 
 | ||||||
|  | 		return addr; | ||||||
|  | 
 | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); | 	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); | ||||||
| 	if (!ptr) | 	if (!ptr) | ||||||
| 		goto no_mem; | 		goto no_mem; | ||||||
| 	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA); |  | ||||||
| 	if (!map) |  | ||||||
| 		goto no_map; |  | ||||||
| 
 | 
 | ||||||
| 	/* remove any dirty cache lines on the kernel alias */ | 	/* remove any dirty cache lines on the kernel alias */ | ||||||
| 	__dma_flush_range(ptr, ptr + size); | 	__dma_flush_range(ptr, ptr + size); | ||||||
| 
 | 
 | ||||||
| 	/* create a coherent mapping */ | 	/* create a coherent mapping */ | ||||||
| 	page = virt_to_page(ptr); | 	page = virt_to_page(ptr); | ||||||
| 	for (i = 0; i < (size >> PAGE_SHIFT); i++) | 	coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, | ||||||
| 		map[i] = page + i; | 				__get_dma_pgprot(attrs, | ||||||
| 	coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, | 					__pgprot(PROT_NORMAL_NC), false), | ||||||
| 			    __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false)); | 					NULL); | ||||||
| 	kfree(map); |  | ||||||
| 	if (!coherent_ptr) | 	if (!coherent_ptr) | ||||||
| 		goto no_map; | 		goto no_map; | ||||||
| 
 | 
 | ||||||
|  | @ -132,6 +185,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size, | ||||||
| { | { | ||||||
| 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); | 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); | ||||||
| 
 | 
 | ||||||
|  | 	if (__free_from_pool(vaddr, size)) | ||||||
|  | 		return; | ||||||
| 	vunmap(vaddr); | 	vunmap(vaddr); | ||||||
| 	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); | 	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); | ||||||
| } | } | ||||||
|  | @ -307,6 +362,67 @@ EXPORT_SYMBOL(coherent_swiotlb_dma_ops); | ||||||
| 
 | 
 | ||||||
| extern int swiotlb_late_init_with_default_size(size_t default_size); | extern int swiotlb_late_init_with_default_size(size_t default_size); | ||||||
| 
 | 
 | ||||||
|  | static int __init atomic_pool_init(void) | ||||||
|  | { | ||||||
|  | 	pgprot_t prot = __pgprot(PROT_NORMAL_NC); | ||||||
|  | 	unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; | ||||||
|  | 	struct page *page; | ||||||
|  | 	void *addr; | ||||||
|  | 	unsigned int pool_size_order = get_order(atomic_pool_size); | ||||||
|  | 
 | ||||||
|  | 	if (dev_get_cma_area(NULL)) | ||||||
|  | 		page = dma_alloc_from_contiguous(NULL, nr_pages, | ||||||
|  | 							pool_size_order); | ||||||
|  | 	else | ||||||
|  | 		page = alloc_pages(GFP_DMA, pool_size_order); | ||||||
|  | 
 | ||||||
|  | 	if (page) { | ||||||
|  | 		int ret; | ||||||
|  | 		void *page_addr = page_address(page); | ||||||
|  | 
 | ||||||
|  | 		memset(page_addr, 0, atomic_pool_size); | ||||||
|  | 		__dma_flush_range(page_addr, page_addr + atomic_pool_size); | ||||||
|  | 
 | ||||||
|  | 		atomic_pool = gen_pool_create(PAGE_SHIFT, -1); | ||||||
|  | 		if (!atomic_pool) | ||||||
|  | 			goto free_page; | ||||||
|  | 
 | ||||||
|  | 		addr = dma_common_contiguous_remap(page, atomic_pool_size, | ||||||
|  | 					VM_USERMAP, prot, atomic_pool_init); | ||||||
|  | 
 | ||||||
|  | 		if (!addr) | ||||||
|  | 			goto destroy_genpool; | ||||||
|  | 
 | ||||||
|  | 		ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, | ||||||
|  | 					page_to_phys(page), | ||||||
|  | 					atomic_pool_size, -1); | ||||||
|  | 		if (ret) | ||||||
|  | 			goto remove_mapping; | ||||||
|  | 
 | ||||||
|  | 		gen_pool_set_algo(atomic_pool, | ||||||
|  | 				  gen_pool_first_fit_order_align, | ||||||
|  | 				  (void *)PAGE_SHIFT); | ||||||
|  | 
 | ||||||
|  | 		pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", | ||||||
|  | 			atomic_pool_size / 1024); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 	goto out; | ||||||
|  | 
 | ||||||
|  | remove_mapping: | ||||||
|  | 	dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); | ||||||
|  | destroy_genpool: | ||||||
|  | 	gen_pool_destroy(atomic_pool); | ||||||
|  | 	atomic_pool = NULL; | ||||||
|  | free_page: | ||||||
|  | 	if (!dma_release_from_contiguous(NULL, page, nr_pages)) | ||||||
|  | 		__free_pages(page, pool_size_order); | ||||||
|  | out: | ||||||
|  | 	pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", | ||||||
|  | 		atomic_pool_size / 1024); | ||||||
|  | 	return -ENOMEM; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int __init swiotlb_late_init(void) | static int __init swiotlb_late_init(void) | ||||||
| { | { | ||||||
| 	size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); | 	size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); | ||||||
|  | @ -315,7 +431,17 @@ static int __init swiotlb_late_init(void) | ||||||
| 
 | 
 | ||||||
| 	return swiotlb_late_init_with_default_size(swiotlb_size); | 	return swiotlb_late_init_with_default_size(swiotlb_size); | ||||||
| } | } | ||||||
| arch_initcall(swiotlb_late_init); | 
 | ||||||
|  | static int __init arm64_dma_init(void) | ||||||
|  | { | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	ret |= swiotlb_late_init(); | ||||||
|  | 	ret |= atomic_pool_init(); | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | arch_initcall(arm64_dma_init); | ||||||
| 
 | 
 | ||||||
| #define PREALLOC_DMA_DEBUG_ENTRIES	4096 | #define PREALLOC_DMA_DEBUG_ENTRIES	4096 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -104,3 +104,19 @@ EXPORT_SYMBOL(flush_dcache_page); | ||||||
|  */ |  */ | ||||||
| EXPORT_SYMBOL(flush_cache_all); | EXPORT_SYMBOL(flush_cache_all); | ||||||
| EXPORT_SYMBOL(flush_icache_range); | EXPORT_SYMBOL(flush_icache_range); | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||||||
|  | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | ||||||
|  | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | ||||||
|  | 			  pmd_t *pmdp) | ||||||
|  | { | ||||||
|  | 	pmd_t pmd = pmd_mksplitting(*pmdp); | ||||||
|  | 
 | ||||||
|  | 	VM_BUG_ON(address & ~PMD_MASK); | ||||||
|  | 	set_pmd_at(vma->vm_mm, address, pmdp, pmd); | ||||||
|  | 
 | ||||||
|  | 	/* dummy IPI to serialise against fast_gup */ | ||||||
|  | 	kick_all_cpus_sync(); | ||||||
|  | } | ||||||
|  | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | ||||||
|  | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ generic-y += mcs_spinlock.h | ||||||
| generic-y += module.h | generic-y += module.h | ||||||
| generic-y += preempt.h | generic-y += preempt.h | ||||||
| generic-y += scatterlist.h | generic-y += scatterlist.h | ||||||
|  | generic-y += sections.h | ||||||
| generic-y += trace_clock.h | generic-y += trace_clock.h | ||||||
| generic-y += vga.h | generic-y += vga.h | ||||||
| generic-y += xor.h | generic-y += xor.h | ||||||
|  |  | ||||||
|  | @ -1,7 +0,0 @@ | ||||||
| #ifndef _CRIS_SECTIONS_H |  | ||||||
| #define _CRIS_SECTIONS_H |  | ||||||
| 
 |  | ||||||
| /* nothing to see, move along */ |  | ||||||
| #include <asm-generic/sections.h> |  | ||||||
| 
 |  | ||||||
| #endif |  | ||||||
|  | @ -34,22 +34,6 @@ | ||||||
| /* Forward declaration, a strange C thing */ | /* Forward declaration, a strange C thing */ | ||||||
| struct task_struct; | struct task_struct; | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  *  CPU type and hardware bug flags. Kept separately for each CPU. |  | ||||||
|  */ |  | ||||||
| struct cpuinfo_frv { |  | ||||||
| #ifdef CONFIG_MMU |  | ||||||
| 	unsigned long	*pgd_quick; |  | ||||||
| 	unsigned long	*pte_quick; |  | ||||||
| 	unsigned long	pgtable_cache_sz; |  | ||||||
| #endif |  | ||||||
| } __cacheline_aligned; |  | ||||||
| 
 |  | ||||||
| extern struct cpuinfo_frv __nongprelbss boot_cpu_data; |  | ||||||
| 
 |  | ||||||
| #define cpu_data		(&boot_cpu_data) |  | ||||||
| #define current_cpu_data	boot_cpu_data |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Bus types |  * Bus types | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | @ -107,25 +107,25 @@ static irqreturn_t fpga_interrupt(int irq, void *_mask) | ||||||
| static struct irqaction fpga_irq[4]  = { | static struct irqaction fpga_irq[4]  = { | ||||||
| 	[0] = { | 	[0] = { | ||||||
| 		.handler	= fpga_interrupt, | 		.handler	= fpga_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED | IRQF_SHARED, | 		.flags		= IRQF_SHARED, | ||||||
| 		.name		= "fpga.0", | 		.name		= "fpga.0", | ||||||
| 		.dev_id		= (void *) 0x0028UL, | 		.dev_id		= (void *) 0x0028UL, | ||||||
| 	}, | 	}, | ||||||
| 	[1] = { | 	[1] = { | ||||||
| 		.handler	= fpga_interrupt, | 		.handler	= fpga_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED | IRQF_SHARED, | 		.flags		= IRQF_SHARED, | ||||||
| 		.name		= "fpga.1", | 		.name		= "fpga.1", | ||||||
| 		.dev_id		= (void *) 0x0050UL, | 		.dev_id		= (void *) 0x0050UL, | ||||||
| 	}, | 	}, | ||||||
| 	[2] = { | 	[2] = { | ||||||
| 		.handler	= fpga_interrupt, | 		.handler	= fpga_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED | IRQF_SHARED, | 		.flags		= IRQF_SHARED, | ||||||
| 		.name		= "fpga.2", | 		.name		= "fpga.2", | ||||||
| 		.dev_id		= (void *) 0x1c00UL, | 		.dev_id		= (void *) 0x1c00UL, | ||||||
| 	}, | 	}, | ||||||
| 	[3] = { | 	[3] = { | ||||||
| 		.handler	= fpga_interrupt, | 		.handler	= fpga_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED | IRQF_SHARED, | 		.flags		= IRQF_SHARED, | ||||||
| 		.name		= "fpga.3", | 		.name		= "fpga.3", | ||||||
| 		.dev_id		= (void *) 0x6386UL, | 		.dev_id		= (void *) 0x6386UL, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -105,7 +105,6 @@ static irqreturn_t fpga_interrupt(int irq, void *_mask) | ||||||
| static struct irqaction fpga_irq[1]  = { | static struct irqaction fpga_irq[1]  = { | ||||||
| 	[0] = { | 	[0] = { | ||||||
| 		.handler	= fpga_interrupt, | 		.handler	= fpga_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED, |  | ||||||
| 		.name		= "fpga.0", | 		.name		= "fpga.0", | ||||||
| 		.dev_id		= (void *) 0x0700UL, | 		.dev_id		= (void *) 0x0700UL, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -118,13 +118,13 @@ static irqreturn_t mb93493_interrupt(int irq, void *_piqsr) | ||||||
| static struct irqaction mb93493_irq[2]  = { | static struct irqaction mb93493_irq[2]  = { | ||||||
| 	[0] = { | 	[0] = { | ||||||
| 		.handler	= mb93493_interrupt, | 		.handler	= mb93493_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED | IRQF_SHARED, | 		.flags		= IRQF_SHARED, | ||||||
| 		.name		= "mb93493.0", | 		.name		= "mb93493.0", | ||||||
| 		.dev_id		= (void *) __addr_MB93493_IQSR(0), | 		.dev_id		= (void *) __addr_MB93493_IQSR(0), | ||||||
| 	}, | 	}, | ||||||
| 	[1] = { | 	[1] = { | ||||||
| 		.handler	= mb93493_interrupt, | 		.handler	= mb93493_interrupt, | ||||||
| 		.flags		= IRQF_DISABLED | IRQF_SHARED, | 		.flags		= IRQF_SHARED, | ||||||
| 		.name		= "mb93493.1", | 		.name		= "mb93493.1", | ||||||
| 		.dev_id		= (void *) __addr_MB93493_IQSR(1), | 		.dev_id		= (void *) __addr_MB93493_IQSR(1), | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -104,8 +104,6 @@ unsigned long __nongprelbss dma_coherent_mem_end; | ||||||
| unsigned long __initdata __sdram_old_base; | unsigned long __initdata __sdram_old_base; | ||||||
| unsigned long __initdata num_mappedpages; | unsigned long __initdata num_mappedpages; | ||||||
| 
 | 
 | ||||||
| struct cpuinfo_frv __nongprelbss boot_cpu_data; |  | ||||||
| 
 |  | ||||||
| char __initdata command_line[COMMAND_LINE_SIZE]; | char __initdata command_line[COMMAND_LINE_SIZE]; | ||||||
| char __initdata redboot_command_line[COMMAND_LINE_SIZE]; | char __initdata redboot_command_line[COMMAND_LINE_SIZE]; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -44,7 +44,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy); | ||||||
| 
 | 
 | ||||||
| static struct irqaction timer_irq  = { | static struct irqaction timer_irq  = { | ||||||
| 	.handler = timer_interrupt, | 	.handler = timer_interrupt, | ||||||
| 	.flags = IRQF_DISABLED, |  | ||||||
| 	.name = "timer", | 	.name = "timer", | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -8,4 +8,5 @@ generic-y += mcs_spinlock.h | ||||||
| generic-y += module.h | generic-y += module.h | ||||||
| generic-y += preempt.h | generic-y += preempt.h | ||||||
| generic-y += scatterlist.h | generic-y += scatterlist.h | ||||||
|  | generic-y += sections.h | ||||||
| generic-y += trace_clock.h | generic-y += trace_clock.h | ||||||
|  |  | ||||||
|  | @ -1,7 +0,0 @@ | ||||||
| #ifndef _M32R_SECTIONS_H |  | ||||||
| #define _M32R_SECTIONS_H |  | ||||||
| 
 |  | ||||||
| /* nothing to see, move along */ |  | ||||||
| #include <asm-generic/sections.h> |  | ||||||
| 
 |  | ||||||
| #endif	/* _M32R_SECTIONS_H */ |  | ||||||
|  | @ -134,7 +134,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | ||||||
| 
 | 
 | ||||||
| static struct irqaction irq0 = { | static struct irqaction irq0 = { | ||||||
| 	.handler = timer_interrupt, | 	.handler = timer_interrupt, | ||||||
| 	.flags = IRQF_DISABLED, |  | ||||||
| 	.name = "MFT2", | 	.name = "MFT2", | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -376,7 +376,6 @@ cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len) | ||||||
| asmlinkage int | asmlinkage int | ||||||
| sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len) | sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len) | ||||||
| { | { | ||||||
| 	struct vm_area_struct *vma; |  | ||||||
| 	int ret = -EINVAL; | 	int ret = -EINVAL; | ||||||
| 
 | 
 | ||||||
| 	if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL || | 	if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL || | ||||||
|  | @ -389,17 +388,21 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len) | ||||||
| 		if (!capable(CAP_SYS_ADMIN)) | 		if (!capable(CAP_SYS_ADMIN)) | ||||||
| 			goto out; | 			goto out; | ||||||
| 	} else { | 	} else { | ||||||
|  | 		struct vm_area_struct *vma; | ||||||
|  | 
 | ||||||
|  | 		/* Check for overflow.  */ | ||||||
|  | 		if (addr + len < addr) | ||||||
|  | 			goto out; | ||||||
|  | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Verify that the specified address region actually belongs | 		 * Verify that the specified address region actually belongs | ||||||
| 		 * to this process. | 		 * to this process. | ||||||
| 		 */ | 		 */ | ||||||
| 		vma = find_vma (current->mm, addr); |  | ||||||
| 		ret = -EINVAL; | 		ret = -EINVAL; | ||||||
| 		/* Check for overflow.  */ | 		down_read(¤t->mm->mmap_sem); | ||||||
| 		if (addr + len < addr) | 		vma = find_vma(current->mm, addr); | ||||||
| 			goto out; | 		if (!vma || addr < vma->vm_start || addr + len > vma->vm_end) | ||||||
| 		if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end) | 			goto out_unlock; | ||||||
| 			goto out; |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (CPU_IS_020_OR_030) { | 	if (CPU_IS_020_OR_030) { | ||||||
|  | @ -429,7 +432,7 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len) | ||||||
| 			__asm__ __volatile__ ("movec %0, %%cacr" : : "r" (cacr)); | 			__asm__ __volatile__ ("movec %0, %%cacr" : : "r" (cacr)); | ||||||
| 		} | 		} | ||||||
| 		ret = 0; | 		ret = 0; | ||||||
| 		goto out; | 		goto out_unlock; | ||||||
| 	} else { | 	} else { | ||||||
| 	    /*
 | 	    /*
 | ||||||
| 	     * 040 or 060: don't blindly trust 'scope', someone could | 	     * 040 or 060: don't blindly trust 'scope', someone could | ||||||
|  | @ -446,6 +449,8 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len) | ||||||
| 		ret = cache_flush_060 (addr, scope, cache, len); | 		ret = cache_flush_060 (addr, scope, cache, len); | ||||||
| 	    } | 	    } | ||||||
| 	} | 	} | ||||||
|  | out_unlock: | ||||||
|  | 	up_read(¤t->mm->mmap_sem); | ||||||
| out: | out: | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,7 +0,0 @@ | ||||||
| #ifndef __ASM_SUSPEND_H |  | ||||||
| #define __ASM_SUSPEND_H |  | ||||||
| 
 |  | ||||||
| /* References to section boundaries */ |  | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 |  | ||||||
| #endif /* __ASM_SUSPEND_H */ |  | ||||||
|  | @ -7,7 +7,7 @@ | ||||||
|  * Author: Hu Hongbing <huhb@lemote.com> |  * Author: Hu Hongbing <huhb@lemote.com> | ||||||
|  *	   Wu Zhangjin <wuzhangjin@gmail.com> |  *	   Wu Zhangjin <wuzhangjin@gmail.com> | ||||||
|  */ |  */ | ||||||
| #include <asm/suspend.h> | #include <asm/sections.h> | ||||||
| #include <asm/fpu.h> | #include <asm/fpu.h> | ||||||
| #include <asm/dsp.h> | #include <asm/dsp.h> | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -8,4 +8,5 @@ generic-y += irq_work.h | ||||||
| generic-y += mcs_spinlock.h | generic-y += mcs_spinlock.h | ||||||
| generic-y += preempt.h | generic-y += preempt.h | ||||||
| generic-y += scatterlist.h | generic-y += scatterlist.h | ||||||
|  | generic-y += sections.h | ||||||
| generic-y += trace_clock.h | generic-y += trace_clock.h | ||||||
|  |  | ||||||
|  | @ -1 +0,0 @@ | ||||||
| #include <asm-generic/sections.h> |  | ||||||
|  | @ -38,10 +38,9 @@ static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) | ||||||
| static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } | static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_NUMA_BALANCING | #ifdef CONFIG_NUMA_BALANCING | ||||||
| 
 |  | ||||||
| static inline int pte_present(pte_t pte) | static inline int pte_present(pte_t pte) | ||||||
| { | { | ||||||
| 	return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA); | 	return pte_val(pte) & _PAGE_NUMA_MASK; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define pte_present_nonuma pte_present_nonuma | #define pte_present_nonuma pte_present_nonuma | ||||||
|  | @ -50,37 +49,6 @@ static inline int pte_present_nonuma(pte_t pte) | ||||||
| 	return pte_val(pte) & (_PAGE_PRESENT); | 	return pte_val(pte) & (_PAGE_PRESENT); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define pte_numa pte_numa |  | ||||||
| static inline int pte_numa(pte_t pte) |  | ||||||
| { |  | ||||||
| 	return (pte_val(pte) & |  | ||||||
| 		(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #define pte_mknonnuma pte_mknonnuma |  | ||||||
| static inline pte_t pte_mknonnuma(pte_t pte) |  | ||||||
| { |  | ||||||
| 	pte_val(pte) &= ~_PAGE_NUMA; |  | ||||||
| 	pte_val(pte) |=  _PAGE_PRESENT | _PAGE_ACCESSED; |  | ||||||
| 	return pte; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #define pte_mknuma pte_mknuma |  | ||||||
| static inline pte_t pte_mknuma(pte_t pte) |  | ||||||
| { |  | ||||||
| 	/*
 |  | ||||||
| 	 * We should not set _PAGE_NUMA on non present ptes. Also clear the |  | ||||||
| 	 * present bit so that hash_page will return 1 and we collect this |  | ||||||
| 	 * as numa fault. |  | ||||||
| 	 */ |  | ||||||
| 	if (pte_present(pte)) { |  | ||||||
| 		pte_val(pte) |= _PAGE_NUMA; |  | ||||||
| 		pte_val(pte) &= ~_PAGE_PRESENT; |  | ||||||
| 	} else |  | ||||||
| 		VM_BUG_ON(1); |  | ||||||
| 	return pte; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #define ptep_set_numa ptep_set_numa | #define ptep_set_numa ptep_set_numa | ||||||
| static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | ||||||
| 				 pte_t *ptep) | 				 pte_t *ptep) | ||||||
|  | @ -92,12 +60,6 @@ static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | ||||||
| 	return; | 	return; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define pmd_numa pmd_numa |  | ||||||
| static inline int pmd_numa(pmd_t pmd) |  | ||||||
| { |  | ||||||
| 	return pte_numa(pmd_pte(pmd)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #define pmdp_set_numa pmdp_set_numa | #define pmdp_set_numa pmdp_set_numa | ||||||
| static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | ||||||
| 				 pmd_t *pmdp) | 				 pmd_t *pmdp) | ||||||
|  | @ -109,16 +71,21 @@ static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | ||||||
| 	return; | 	return; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define pmd_mknonnuma pmd_mknonnuma | /*
 | ||||||
| static inline pmd_t pmd_mknonnuma(pmd_t pmd) |  * Generic NUMA pte helpers expect pteval_t and pmdval_t types to exist | ||||||
|  |  * which was inherited from x86. For the purposes of powerpc pte_basic_t and | ||||||
|  |  * pmd_t are equivalent | ||||||
|  |  */ | ||||||
|  | #define pteval_t pte_basic_t | ||||||
|  | #define pmdval_t pmd_t | ||||||
|  | static inline pteval_t ptenuma_flags(pte_t pte) | ||||||
| { | { | ||||||
| 	return pte_pmd(pte_mknonnuma(pmd_pte(pmd))); | 	return pte_val(pte) & _PAGE_NUMA_MASK; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define pmd_mknuma pmd_mknuma | static inline pmdval_t pmdnuma_flags(pmd_t pmd) | ||||||
| static inline pmd_t pmd_mknuma(pmd_t pmd) |  | ||||||
| { | { | ||||||
| 	return pte_pmd(pte_mknuma(pmd_pte(pmd))); | 	return pmd_val(pmd) & _PAGE_NUMA_MASK; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # else | # else | ||||||
|  |  | ||||||
|  | @ -98,6 +98,11 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); | ||||||
| 			 _PAGE_USER | _PAGE_ACCESSED | \ | 			 _PAGE_USER | _PAGE_ACCESSED | \ | ||||||
| 			 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) | 			 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_NUMA_BALANCING | ||||||
|  | /* Mask of bits that distinguish present and numa ptes */ | ||||||
|  | #define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PRESENT) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * We define 2 sets of base prot bits, one for basic pages (ie, |  * We define 2 sets of base prot bits, one for basic pages (ie, | ||||||
|  * cacheable kernel and user pages) and one for non cacheable |  * cacheable kernel and user pages) and one for non cacheable | ||||||
|  |  | ||||||
|  | @ -9,9 +9,7 @@ | ||||||
| 
 | 
 | ||||||
| #include <linux/mm.h> | #include <linux/mm.h> | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
| 
 | #include <asm/sections.h> | ||||||
| /* References to section boundaries */ |  | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  *	pfn_is_nosave - check if given pfn is in the 'nosave' section |  *	pfn_is_nosave - check if given pfn is in the 'nosave' section | ||||||
|  |  | ||||||
|  | @ -13,13 +13,9 @@ | ||||||
| #include <asm/ipl.h> | #include <asm/ipl.h> | ||||||
| #include <asm/cio.h> | #include <asm/cio.h> | ||||||
| #include <asm/pci.h> | #include <asm/pci.h> | ||||||
|  | #include <asm/sections.h> | ||||||
| #include "entry.h" | #include "entry.h" | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * References to section boundaries |  | ||||||
|  */ |  | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * The restore of the saved pages in an hibernation image will set |  * The restore of the saved pages in an hibernation image will set | ||||||
|  * the change and referenced bits in the storage key for each page. |  * the change and referenced bits in the storage key for each page. | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ generic-y += irq_work.h | ||||||
| generic-y += mcs_spinlock.h | generic-y += mcs_spinlock.h | ||||||
| generic-y += preempt.h | generic-y += preempt.h | ||||||
| generic-y += scatterlist.h | generic-y += scatterlist.h | ||||||
|  | generic-y += sections.h | ||||||
| generic-y += trace_clock.h | generic-y += trace_clock.h | ||||||
| generic-y += xor.h | generic-y += xor.h | ||||||
| generic-y += serial.h | generic-y += serial.h | ||||||
|  |  | ||||||
|  | @ -1,6 +0,0 @@ | ||||||
| #ifndef _ASM_SCORE_SECTIONS_H |  | ||||||
| #define _ASM_SCORE_SECTIONS_H |  | ||||||
| 
 |  | ||||||
| #include <asm-generic/sections.h> |  | ||||||
| 
 |  | ||||||
| #endif /* _ASM_SCORE_SECTIONS_H */ |  | ||||||
|  | @ -3,7 +3,6 @@ | ||||||
| 
 | 
 | ||||||
| #include <asm-generic/sections.h> | #include <asm-generic/sections.h> | ||||||
| 
 | 
 | ||||||
| extern long __nosave_begin, __nosave_end; |  | ||||||
| extern long __machvec_start, __machvec_end; | extern long __machvec_start, __machvec_end; | ||||||
| extern char __uncached_start, __uncached_end; | extern char __uncached_start, __uncached_end; | ||||||
| extern char __start_eh_frame[], __stop_eh_frame[]; | extern char __start_eh_frame[], __stop_eh_frame[]; | ||||||
|  |  | ||||||
|  | @ -9,11 +9,9 @@ | ||||||
| #include <asm/hibernate.h> | #include <asm/hibernate.h> | ||||||
| #include <asm/visasm.h> | #include <asm/visasm.h> | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
|  | #include <asm/sections.h> | ||||||
| #include <asm/tlb.h> | #include <asm/tlb.h> | ||||||
| 
 | 
 | ||||||
| /* References to section boundaries */ |  | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 |  | ||||||
| struct saved_context saved_context; | struct saved_context saved_context; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  |  | ||||||
|  | @ -36,8 +36,5 @@ extern int puv3_pm_enter(suspend_state_t state); | ||||||
| /* Defined in hibernate_asm.S */ | /* Defined in hibernate_asm.S */ | ||||||
| extern int restore_image(pgd_t *resume_pg_dir, struct pbe *restore_pblist); | extern int restore_image(pgd_t *resume_pg_dir, struct pbe *restore_pblist); | ||||||
| 
 | 
 | ||||||
| /* References to section boundaries */ |  | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 |  | ||||||
| extern struct pbe *restore_pblist; | extern struct pbe *restore_pblist; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
| #include <asm/pgtable.h> | #include <asm/pgtable.h> | ||||||
| #include <asm/pgalloc.h> | #include <asm/pgalloc.h> | ||||||
|  | #include <asm/sections.h> | ||||||
| #include <asm/suspend.h> | #include <asm/suspend.h> | ||||||
| 
 | 
 | ||||||
| #include "mach/pm.h" | #include "mach/pm.h" | ||||||
|  |  | ||||||
|  | @ -30,7 +30,6 @@ config X86 | ||||||
| 	select HAVE_UNSTABLE_SCHED_CLOCK | 	select HAVE_UNSTABLE_SCHED_CLOCK | ||||||
| 	select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 | 	select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 | ||||||
| 	select ARCH_SUPPORTS_INT128 if X86_64 | 	select ARCH_SUPPORTS_INT128 if X86_64 | ||||||
| 	select ARCH_WANTS_PROT_NUMA_PROT_NONE |  | ||||||
| 	select HAVE_IDE | 	select HAVE_IDE | ||||||
| 	select HAVE_OPROFILE | 	select HAVE_OPROFILE | ||||||
| 	select HAVE_PCSPKR_PLATFORM | 	select HAVE_PCSPKR_PLATFORM | ||||||
|  |  | ||||||
|  | @ -325,6 +325,20 @@ static inline pteval_t pte_flags(pte_t pte) | ||||||
| 	return native_pte_val(pte) & PTE_FLAGS_MASK; | 	return native_pte_val(pte) & PTE_FLAGS_MASK; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_NUMA_BALANCING | ||||||
|  | /* Set of bits that distinguishes present, prot_none and numa ptes */ | ||||||
|  | #define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT) | ||||||
|  | static inline pteval_t ptenuma_flags(pte_t pte) | ||||||
|  | { | ||||||
|  | 	return pte_flags(pte) & _PAGE_NUMA_MASK; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline pmdval_t pmdnuma_flags(pmd_t pmd) | ||||||
|  | { | ||||||
|  | 	return pmd_flags(pmd) & _PAGE_NUMA_MASK; | ||||||
|  | } | ||||||
|  | #endif /* CONFIG_NUMA_BALANCING */ | ||||||
|  | 
 | ||||||
| #define pgprot_val(x)	((x).pgprot) | #define pgprot_val(x)	((x).pgprot) | ||||||
| #define __pgprot(x)	((pgprot_t) { (x) } ) | #define __pgprot(x)	((pgprot_t) { (x) } ) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -13,13 +13,11 @@ | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
| #include <asm/pgtable.h> | #include <asm/pgtable.h> | ||||||
| #include <asm/mmzone.h> | #include <asm/mmzone.h> | ||||||
|  | #include <asm/sections.h> | ||||||
| 
 | 
 | ||||||
| /* Defined in hibernate_asm_32.S */ | /* Defined in hibernate_asm_32.S */ | ||||||
| extern int restore_image(void); | extern int restore_image(void); | ||||||
| 
 | 
 | ||||||
| /* References to section boundaries */ |  | ||||||
| extern const void __nosave_begin, __nosave_end; |  | ||||||
| 
 |  | ||||||
| /* Pointer to the temporary resume page tables */ | /* Pointer to the temporary resume page tables */ | ||||||
| pgd_t *resume_pg_dir; | pgd_t *resume_pg_dir; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -17,11 +17,9 @@ | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
| #include <asm/pgtable.h> | #include <asm/pgtable.h> | ||||||
| #include <asm/mtrr.h> | #include <asm/mtrr.h> | ||||||
|  | #include <asm/sections.h> | ||||||
| #include <asm/suspend.h> | #include <asm/suspend.h> | ||||||
| 
 | 
 | ||||||
| /* References to section boundaries */ |  | ||||||
| extern __visible const void __nosave_begin, __nosave_end; |  | ||||||
| 
 |  | ||||||
| /* Defined in hibernate_asm_64.S */ | /* Defined in hibernate_asm_64.S */ | ||||||
| extern asmlinkage __visible int restore_image(void); | extern asmlinkage __visible int restore_image(void); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -252,6 +252,9 @@ config DMA_CMA | ||||||
| 	  to allocate big physically-contiguous blocks of memory for use with | 	  to allocate big physically-contiguous blocks of memory for use with | ||||||
| 	  hardware components that do not support I/O map nor scatter-gather. | 	  hardware components that do not support I/O map nor scatter-gather. | ||||||
| 
 | 
 | ||||||
|  | 	  You can disable CMA by specifying "cma=0" on the kernel's command | ||||||
|  | 	  line. | ||||||
|  | 
 | ||||||
| 	  For more information see <include/linux/dma-contiguous.h>. | 	  For more information see <include/linux/dma-contiguous.h>. | ||||||
| 	  If unsure, say "n". | 	  If unsure, say "n". | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,8 @@ | ||||||
| #include <linux/dma-mapping.h> | #include <linux/dma-mapping.h> | ||||||
| #include <linux/export.h> | #include <linux/export.h> | ||||||
| #include <linux/gfp.h> | #include <linux/gfp.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | #include <linux/vmalloc.h> | ||||||
| #include <asm-generic/dma-coherent.h> | #include <asm-generic/dma-coherent.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -267,3 +269,73 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(dma_common_mmap); | EXPORT_SYMBOL(dma_common_mmap); | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_MMU | ||||||
|  | /*
 | ||||||
|  |  * remaps an array of PAGE_SIZE pages into another vm_area | ||||||
|  |  * Cannot be used in non-sleeping contexts | ||||||
|  |  */ | ||||||
|  | void *dma_common_pages_remap(struct page **pages, size_t size, | ||||||
|  | 			unsigned long vm_flags, pgprot_t prot, | ||||||
|  | 			const void *caller) | ||||||
|  | { | ||||||
|  | 	struct vm_struct *area; | ||||||
|  | 
 | ||||||
|  | 	area = get_vm_area_caller(size, vm_flags, caller); | ||||||
|  | 	if (!area) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	area->pages = pages; | ||||||
|  | 
 | ||||||
|  | 	if (map_vm_area(area, prot, pages)) { | ||||||
|  | 		vunmap(area->addr); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return area->addr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * remaps an allocated contiguous region into another vm_area. | ||||||
|  |  * Cannot be used in non-sleeping contexts | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | void *dma_common_contiguous_remap(struct page *page, size_t size, | ||||||
|  | 			unsigned long vm_flags, | ||||||
|  | 			pgprot_t prot, const void *caller) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 	struct page **pages; | ||||||
|  | 	void *ptr; | ||||||
|  | 	unsigned long pfn; | ||||||
|  | 
 | ||||||
|  | 	pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); | ||||||
|  | 	if (!pages) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0, pfn = page_to_pfn(page); i < (size >> PAGE_SHIFT); i++) | ||||||
|  | 		pages[i] = pfn_to_page(pfn + i); | ||||||
|  | 
 | ||||||
|  | 	ptr = dma_common_pages_remap(pages, size, vm_flags, prot, caller); | ||||||
|  | 
 | ||||||
|  | 	kfree(pages); | ||||||
|  | 
 | ||||||
|  | 	return ptr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * unmaps a range previously mapped by dma_common_*_remap | ||||||
|  |  */ | ||||||
|  | void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) | ||||||
|  | { | ||||||
|  | 	struct vm_struct *area = find_vm_area(cpu_addr); | ||||||
|  | 
 | ||||||
|  | 	if (!area || (area->flags & vm_flags) != vm_flags) { | ||||||
|  | 		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	unmap_kernel_range((unsigned long)cpu_addr, size); | ||||||
|  | 	vunmap(cpu_addr); | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | @ -373,6 +373,45 @@ static ssize_t show_phys_device(struct device *dev, | ||||||
| 	return sprintf(buf, "%d\n", mem->phys_device); | 	return sprintf(buf, "%d\n", mem->phys_device); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_MEMORY_HOTREMOVE | ||||||
|  | static ssize_t show_valid_zones(struct device *dev, | ||||||
|  | 				struct device_attribute *attr, char *buf) | ||||||
|  | { | ||||||
|  | 	struct memory_block *mem = to_memory_block(dev); | ||||||
|  | 	unsigned long start_pfn, end_pfn; | ||||||
|  | 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; | ||||||
|  | 	struct page *first_page; | ||||||
|  | 	struct zone *zone; | ||||||
|  | 
 | ||||||
|  | 	start_pfn = section_nr_to_pfn(mem->start_section_nr); | ||||||
|  | 	end_pfn = start_pfn + nr_pages; | ||||||
|  | 	first_page = pfn_to_page(start_pfn); | ||||||
|  | 
 | ||||||
|  | 	/* The block contains more than one zone can not be offlined. */ | ||||||
|  | 	if (!test_pages_in_a_zone(start_pfn, end_pfn)) | ||||||
|  | 		return sprintf(buf, "none\n"); | ||||||
|  | 
 | ||||||
|  | 	zone = page_zone(first_page); | ||||||
|  | 
 | ||||||
|  | 	if (zone_idx(zone) == ZONE_MOVABLE - 1) { | ||||||
|  | 		/*The mem block is the last memoryblock of this zone.*/ | ||||||
|  | 		if (end_pfn == zone_end_pfn(zone)) | ||||||
|  | 			return sprintf(buf, "%s %s\n", | ||||||
|  | 					zone->name, (zone + 1)->name); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (zone_idx(zone) == ZONE_MOVABLE) { | ||||||
|  | 		/*The mem block is the first memoryblock of ZONE_MOVABLE.*/ | ||||||
|  | 		if (start_pfn == zone->zone_start_pfn) | ||||||
|  | 			return sprintf(buf, "%s %s\n", | ||||||
|  | 					zone->name, (zone - 1)->name); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return sprintf(buf, "%s\n", zone->name); | ||||||
|  | } | ||||||
|  | static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); | static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); | ||||||
| static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state); | static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state); | ||||||
| static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL); | static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL); | ||||||
|  | @ -523,6 +562,9 @@ static struct attribute *memory_memblk_attrs[] = { | ||||||
| 	&dev_attr_state.attr, | 	&dev_attr_state.attr, | ||||||
| 	&dev_attr_phys_device.attr, | 	&dev_attr_phys_device.attr, | ||||||
| 	&dev_attr_removable.attr, | 	&dev_attr_removable.attr, | ||||||
|  | #ifdef CONFIG_MEMORY_HOTREMOVE | ||||||
|  | 	&dev_attr_valid_zones.attr, | ||||||
|  | #endif | ||||||
| 	NULL | 	NULL | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -289,8 +289,6 @@ static int register_node(struct node *node, int num, struct node *parent) | ||||||
| 		device_create_file(&node->dev, &dev_attr_distance); | 		device_create_file(&node->dev, &dev_attr_distance); | ||||||
| 		device_create_file(&node->dev, &dev_attr_vmstat); | 		device_create_file(&node->dev, &dev_attr_vmstat); | ||||||
| 
 | 
 | ||||||
| 		scan_unevictable_register_node(node); |  | ||||||
| 
 |  | ||||||
| 		hugetlb_register_node(node); | 		hugetlb_register_node(node); | ||||||
| 
 | 
 | ||||||
| 		compaction_register_node(node); | 		compaction_register_node(node); | ||||||
|  | @ -314,7 +312,6 @@ void unregister_node(struct node *node) | ||||||
| 	device_remove_file(&node->dev, &dev_attr_distance); | 	device_remove_file(&node->dev, &dev_attr_distance); | ||||||
| 	device_remove_file(&node->dev, &dev_attr_vmstat); | 	device_remove_file(&node->dev, &dev_attr_vmstat); | ||||||
| 
 | 
 | ||||||
| 	scan_unevictable_unregister_node(node); |  | ||||||
| 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */ | 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */ | ||||||
| 
 | 
 | ||||||
| 	device_unregister(&node->dev); | 	device_unregister(&node->dev); | ||||||
|  |  | ||||||
|  | @ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev, | ||||||
| 
 | 
 | ||||||
| 	down_read(&zram->init_lock); | 	down_read(&zram->init_lock); | ||||||
| 	if (init_done(zram)) | 	if (init_done(zram)) | ||||||
| 		val = zs_get_total_size_bytes(meta->mem_pool); | 		val = zs_get_total_pages(meta->mem_pool); | ||||||
| 	up_read(&zram->init_lock); | 	up_read(&zram->init_lock); | ||||||
| 
 | 
 | ||||||
| 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val); | 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static ssize_t max_comp_streams_show(struct device *dev, | static ssize_t max_comp_streams_show(struct device *dev, | ||||||
|  | @ -122,6 +122,72 @@ static ssize_t max_comp_streams_show(struct device *dev, | ||||||
| 	return scnprintf(buf, PAGE_SIZE, "%d\n", val); | 	return scnprintf(buf, PAGE_SIZE, "%d\n", val); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static ssize_t mem_limit_show(struct device *dev, | ||||||
|  | 		struct device_attribute *attr, char *buf) | ||||||
|  | { | ||||||
|  | 	u64 val; | ||||||
|  | 	struct zram *zram = dev_to_zram(dev); | ||||||
|  | 
 | ||||||
|  | 	down_read(&zram->init_lock); | ||||||
|  | 	val = zram->limit_pages; | ||||||
|  | 	up_read(&zram->init_lock); | ||||||
|  | 
 | ||||||
|  | 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static ssize_t mem_limit_store(struct device *dev, | ||||||
|  | 		struct device_attribute *attr, const char *buf, size_t len) | ||||||
|  | { | ||||||
|  | 	u64 limit; | ||||||
|  | 	char *tmp; | ||||||
|  | 	struct zram *zram = dev_to_zram(dev); | ||||||
|  | 
 | ||||||
|  | 	limit = memparse(buf, &tmp); | ||||||
|  | 	if (buf == tmp) /* no chars parsed, invalid input */ | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	down_write(&zram->init_lock); | ||||||
|  | 	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | ||||||
|  | 	up_write(&zram->init_lock); | ||||||
|  | 
 | ||||||
|  | 	return len; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static ssize_t mem_used_max_show(struct device *dev, | ||||||
|  | 		struct device_attribute *attr, char *buf) | ||||||
|  | { | ||||||
|  | 	u64 val = 0; | ||||||
|  | 	struct zram *zram = dev_to_zram(dev); | ||||||
|  | 
 | ||||||
|  | 	down_read(&zram->init_lock); | ||||||
|  | 	if (init_done(zram)) | ||||||
|  | 		val = atomic_long_read(&zram->stats.max_used_pages); | ||||||
|  | 	up_read(&zram->init_lock); | ||||||
|  | 
 | ||||||
|  | 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static ssize_t mem_used_max_store(struct device *dev, | ||||||
|  | 		struct device_attribute *attr, const char *buf, size_t len) | ||||||
|  | { | ||||||
|  | 	int err; | ||||||
|  | 	unsigned long val; | ||||||
|  | 	struct zram *zram = dev_to_zram(dev); | ||||||
|  | 	struct zram_meta *meta = zram->meta; | ||||||
|  | 
 | ||||||
|  | 	err = kstrtoul(buf, 10, &val); | ||||||
|  | 	if (err || val != 0) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	down_read(&zram->init_lock); | ||||||
|  | 	if (init_done(zram)) | ||||||
|  | 		atomic_long_set(&zram->stats.max_used_pages, | ||||||
|  | 				zs_get_total_pages(meta->mem_pool)); | ||||||
|  | 	up_read(&zram->init_lock); | ||||||
|  | 
 | ||||||
|  | 	return len; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static ssize_t max_comp_streams_store(struct device *dev, | static ssize_t max_comp_streams_store(struct device *dev, | ||||||
| 		struct device_attribute *attr, const char *buf, size_t len) | 		struct device_attribute *attr, const char *buf, size_t len) | ||||||
| { | { | ||||||
|  | @ -434,6 +500,21 @@ out_cleanup: | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline void update_used_max(struct zram *zram, | ||||||
|  | 					const unsigned long pages) | ||||||
|  | { | ||||||
|  | 	int old_max, cur_max; | ||||||
|  | 
 | ||||||
|  | 	old_max = atomic_long_read(&zram->stats.max_used_pages); | ||||||
|  | 
 | ||||||
|  | 	do { | ||||||
|  | 		cur_max = old_max; | ||||||
|  | 		if (pages > cur_max) | ||||||
|  | 			old_max = atomic_long_cmpxchg( | ||||||
|  | 				&zram->stats.max_used_pages, cur_max, pages); | ||||||
|  | 	} while (old_max != cur_max); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | ||||||
| 			   int offset) | 			   int offset) | ||||||
| { | { | ||||||
|  | @ -445,6 +526,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | ||||||
| 	struct zram_meta *meta = zram->meta; | 	struct zram_meta *meta = zram->meta; | ||||||
| 	struct zcomp_strm *zstrm; | 	struct zcomp_strm *zstrm; | ||||||
| 	bool locked = false; | 	bool locked = false; | ||||||
|  | 	unsigned long alloced_pages; | ||||||
| 
 | 
 | ||||||
| 	page = bvec->bv_page; | 	page = bvec->bv_page; | ||||||
| 	if (is_partial_io(bvec)) { | 	if (is_partial_io(bvec)) { | ||||||
|  | @ -513,6 +595,16 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | ||||||
| 		ret = -ENOMEM; | 		ret = -ENOMEM; | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	alloced_pages = zs_get_total_pages(meta->mem_pool); | ||||||
|  | 	if (zram->limit_pages && alloced_pages > zram->limit_pages) { | ||||||
|  | 		zs_free(meta->mem_pool, handle); | ||||||
|  | 		ret = -ENOMEM; | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	update_used_max(zram, alloced_pages); | ||||||
|  | 
 | ||||||
| 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); | 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); | ||||||
| 
 | 
 | ||||||
| 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { | 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { | ||||||
|  | @ -606,6 +698,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, | ||||||
| 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | 		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | ||||||
| 		zram_free_page(zram, index); | 		zram_free_page(zram, index); | ||||||
| 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | 		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | ||||||
|  | 		atomic64_inc(&zram->stats.notify_free); | ||||||
| 		index++; | 		index++; | ||||||
| 		n -= PAGE_SIZE; | 		n -= PAGE_SIZE; | ||||||
| 	} | 	} | ||||||
|  | @ -617,6 +710,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) | ||||||
| 	struct zram_meta *meta; | 	struct zram_meta *meta; | ||||||
| 
 | 
 | ||||||
| 	down_write(&zram->init_lock); | 	down_write(&zram->init_lock); | ||||||
|  | 
 | ||||||
|  | 	zram->limit_pages = 0; | ||||||
|  | 
 | ||||||
| 	if (!init_done(zram)) { | 	if (!init_done(zram)) { | ||||||
| 		up_write(&zram->init_lock); | 		up_write(&zram->init_lock); | ||||||
| 		return; | 		return; | ||||||
|  | @ -857,6 +953,10 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); | ||||||
| static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); | static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); | ||||||
| static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); | static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); | ||||||
| static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); | static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); | ||||||
|  | static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, | ||||||
|  | 		mem_limit_store); | ||||||
|  | static DEVICE_ATTR(mem_used_max, S_IRUGO | S_IWUSR, mem_used_max_show, | ||||||
|  | 		mem_used_max_store); | ||||||
| static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, | static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, | ||||||
| 		max_comp_streams_show, max_comp_streams_store); | 		max_comp_streams_show, max_comp_streams_store); | ||||||
| static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, | static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, | ||||||
|  | @ -885,6 +985,8 @@ static struct attribute *zram_disk_attrs[] = { | ||||||
| 	&dev_attr_orig_data_size.attr, | 	&dev_attr_orig_data_size.attr, | ||||||
| 	&dev_attr_compr_data_size.attr, | 	&dev_attr_compr_data_size.attr, | ||||||
| 	&dev_attr_mem_used_total.attr, | 	&dev_attr_mem_used_total.attr, | ||||||
|  | 	&dev_attr_mem_limit.attr, | ||||||
|  | 	&dev_attr_mem_used_max.attr, | ||||||
| 	&dev_attr_max_comp_streams.attr, | 	&dev_attr_max_comp_streams.attr, | ||||||
| 	&dev_attr_comp_algorithm.attr, | 	&dev_attr_comp_algorithm.attr, | ||||||
| 	NULL, | 	NULL, | ||||||
|  |  | ||||||
|  | @ -90,6 +90,7 @@ struct zram_stats { | ||||||
| 	atomic64_t notify_free;	/* no. of swap slot free notifications */ | 	atomic64_t notify_free;	/* no. of swap slot free notifications */ | ||||||
| 	atomic64_t zero_pages;		/* no. of zero filled pages */ | 	atomic64_t zero_pages;		/* no. of zero filled pages */ | ||||||
| 	atomic64_t pages_stored;	/* no. of pages currently stored */ | 	atomic64_t pages_stored;	/* no. of pages currently stored */ | ||||||
|  | 	atomic_long_t max_used_pages;	/* no. of maximum pages stored */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct zram_meta { | struct zram_meta { | ||||||
|  | @ -112,6 +113,11 @@ struct zram { | ||||||
| 	u64 disksize;	/* bytes */ | 	u64 disksize;	/* bytes */ | ||||||
| 	int max_comp_streams; | 	int max_comp_streams; | ||||||
| 	struct zram_stats stats; | 	struct zram_stats stats; | ||||||
|  | 	/*
 | ||||||
|  | 	 * the number of pages zram can consume for storing compressed data | ||||||
|  | 	 */ | ||||||
|  | 	unsigned long limit_pages; | ||||||
|  | 
 | ||||||
| 	char compressor[10]; | 	char compressor[10]; | ||||||
| }; | }; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | @ -184,6 +184,9 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry) | ||||||
| 	static int map_entries_nr; | 	static int map_entries_nr; | ||||||
| 	static struct kset *mmap_kset; | 	static struct kset *mmap_kset; | ||||||
| 
 | 
 | ||||||
|  | 	if (entry->kobj.state_in_sysfs) | ||||||
|  | 		return -EEXIST; | ||||||
|  | 
 | ||||||
| 	if (!mmap_kset) { | 	if (!mmap_kset) { | ||||||
| 		mmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj); | 		mmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj); | ||||||
| 		if (!mmap_kset) | 		if (!mmap_kset) | ||||||
|  |  | ||||||
|  | @ -25,6 +25,7 @@ config VIRTIO_PCI | ||||||
| config VIRTIO_BALLOON | config VIRTIO_BALLOON | ||||||
| 	tristate "Virtio balloon driver" | 	tristate "Virtio balloon driver" | ||||||
| 	depends on VIRTIO | 	depends on VIRTIO | ||||||
|  | 	select MEMORY_BALLOON | ||||||
| 	---help--- | 	---help--- | ||||||
| 	 This driver supports increasing and decreasing the amount | 	 This driver supports increasing and decreasing the amount | ||||||
| 	 of memory within a KVM guest. | 	 of memory within a KVM guest. | ||||||
|  |  | ||||||
|  | @ -59,7 +59,7 @@ struct virtio_balloon | ||||||
| 	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE | 	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE | ||||||
| 	 * to num_pages above. | 	 * to num_pages above. | ||||||
| 	 */ | 	 */ | ||||||
| 	struct balloon_dev_info *vb_dev_info; | 	struct balloon_dev_info vb_dev_info; | ||||||
| 
 | 
 | ||||||
| 	/* Synchronize access/update to this struct virtio_balloon elements */ | 	/* Synchronize access/update to this struct virtio_balloon elements */ | ||||||
| 	struct mutex balloon_lock; | 	struct mutex balloon_lock; | ||||||
|  | @ -127,7 +127,7 @@ static void set_page_pfns(u32 pfns[], struct page *page) | ||||||
| 
 | 
 | ||||||
| static void fill_balloon(struct virtio_balloon *vb, size_t num) | static void fill_balloon(struct virtio_balloon *vb, size_t num) | ||||||
| { | { | ||||||
| 	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; | 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; | ||||||
| 
 | 
 | ||||||
| 	/* We can only do one array worth at a time. */ | 	/* We can only do one array worth at a time. */ | ||||||
| 	num = min(num, ARRAY_SIZE(vb->pfns)); | 	num = min(num, ARRAY_SIZE(vb->pfns)); | ||||||
|  | @ -163,15 +163,15 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num) | ||||||
| 	/* Find pfns pointing at start of each page, get pages and free them. */ | 	/* Find pfns pointing at start of each page, get pages and free them. */ | ||||||
| 	for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { | 	for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { | ||||||
| 		struct page *page = balloon_pfn_to_page(pfns[i]); | 		struct page *page = balloon_pfn_to_page(pfns[i]); | ||||||
| 		balloon_page_free(page); |  | ||||||
| 		adjust_managed_page_count(page, 1); | 		adjust_managed_page_count(page, 1); | ||||||
|  | 		put_page(page); /* balloon reference */ | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void leak_balloon(struct virtio_balloon *vb, size_t num) | static void leak_balloon(struct virtio_balloon *vb, size_t num) | ||||||
| { | { | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; | 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; | ||||||
| 
 | 
 | ||||||
| 	/* We can only do one array worth at a time. */ | 	/* We can only do one array worth at a time. */ | ||||||
| 	num = min(num, ARRAY_SIZE(vb->pfns)); | 	num = min(num, ARRAY_SIZE(vb->pfns)); | ||||||
|  | @ -353,12 +353,11 @@ static int init_vqs(struct virtio_balloon *vb) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct address_space_operations virtio_balloon_aops; |  | ||||||
| #ifdef CONFIG_BALLOON_COMPACTION | #ifdef CONFIG_BALLOON_COMPACTION | ||||||
| /*
 | /*
 | ||||||
|  * virtballoon_migratepage - perform the balloon page migration on behalf of |  * virtballoon_migratepage - perform the balloon page migration on behalf of | ||||||
|  *			     a compation thread.     (called under page lock) |  *			     a compation thread.     (called under page lock) | ||||||
|  * @mapping: the page->mapping which will be assigned to the new migrated page. |  * @vb_dev_info: the balloon device | ||||||
|  * @newpage: page that will replace the isolated page after migration finishes. |  * @newpage: page that will replace the isolated page after migration finishes. | ||||||
|  * @page   : the isolated (old) page that is about to be migrated to newpage. |  * @page   : the isolated (old) page that is about to be migrated to newpage. | ||||||
|  * @mode   : compaction mode -- not used for balloon page migration. |  * @mode   : compaction mode -- not used for balloon page migration. | ||||||
|  | @ -373,17 +372,13 @@ static const struct address_space_operations virtio_balloon_aops; | ||||||
|  * This function preforms the balloon page migration task. |  * This function preforms the balloon page migration task. | ||||||
|  * Called through balloon_mapping->a_ops->migratepage |  * Called through balloon_mapping->a_ops->migratepage | ||||||
|  */ |  */ | ||||||
| static int virtballoon_migratepage(struct address_space *mapping, | static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, | ||||||
| 		struct page *newpage, struct page *page, enum migrate_mode mode) | 		struct page *newpage, struct page *page, enum migrate_mode mode) | ||||||
| { | { | ||||||
| 	struct balloon_dev_info *vb_dev_info = balloon_page_device(page); | 	struct virtio_balloon *vb = container_of(vb_dev_info, | ||||||
| 	struct virtio_balloon *vb; | 			struct virtio_balloon, vb_dev_info); | ||||||
| 	unsigned long flags; | 	unsigned long flags; | ||||||
| 
 | 
 | ||||||
| 	BUG_ON(!vb_dev_info); |  | ||||||
| 
 |  | ||||||
| 	vb = vb_dev_info->balloon_device; |  | ||||||
| 
 |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * In order to avoid lock contention while migrating pages concurrently | 	 * In order to avoid lock contention while migrating pages concurrently | ||||||
| 	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock | 	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock | ||||||
|  | @ -395,21 +390,19 @@ static int virtballoon_migratepage(struct address_space *mapping, | ||||||
| 	if (!mutex_trylock(&vb->balloon_lock)) | 	if (!mutex_trylock(&vb->balloon_lock)) | ||||||
| 		return -EAGAIN; | 		return -EAGAIN; | ||||||
| 
 | 
 | ||||||
|  | 	get_page(newpage); /* balloon reference */ | ||||||
|  | 
 | ||||||
| 	/* balloon's page migration 1st step  -- inflate "newpage" */ | 	/* balloon's page migration 1st step  -- inflate "newpage" */ | ||||||
| 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags); | 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags); | ||||||
| 	balloon_page_insert(newpage, mapping, &vb_dev_info->pages); | 	balloon_page_insert(vb_dev_info, newpage); | ||||||
| 	vb_dev_info->isolated_pages--; | 	vb_dev_info->isolated_pages--; | ||||||
|  | 	__count_vm_event(BALLOON_MIGRATE); | ||||||
| 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); | 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); | ||||||
| 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; | 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; | ||||||
| 	set_page_pfns(vb->pfns, newpage); | 	set_page_pfns(vb->pfns, newpage); | ||||||
| 	tell_host(vb, vb->inflate_vq); | 	tell_host(vb, vb->inflate_vq); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/* balloon's page migration 2nd step -- deflate "page" */ | ||||||
| 	 * balloon's page migration 2nd step -- deflate "page" |  | ||||||
| 	 * |  | ||||||
| 	 * It's safe to delete page->lru here because this page is at |  | ||||||
| 	 * an isolated migration list, and this step is expected to happen here |  | ||||||
| 	 */ |  | ||||||
| 	balloon_page_delete(page); | 	balloon_page_delete(page); | ||||||
| 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; | 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; | ||||||
| 	set_page_pfns(vb->pfns, page); | 	set_page_pfns(vb->pfns, page); | ||||||
|  | @ -417,20 +410,15 @@ static int virtballoon_migratepage(struct address_space *mapping, | ||||||
| 
 | 
 | ||||||
| 	mutex_unlock(&vb->balloon_lock); | 	mutex_unlock(&vb->balloon_lock); | ||||||
| 
 | 
 | ||||||
| 	return MIGRATEPAGE_BALLOON_SUCCESS; | 	put_page(page); /* balloon reference */ | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| /* define the balloon_mapping->a_ops callback to allow balloon page migration */ | 	return MIGRATEPAGE_SUCCESS; | ||||||
| static const struct address_space_operations virtio_balloon_aops = { | } | ||||||
| 			.migratepage = virtballoon_migratepage, |  | ||||||
| }; |  | ||||||
| #endif /* CONFIG_BALLOON_COMPACTION */ | #endif /* CONFIG_BALLOON_COMPACTION */ | ||||||
| 
 | 
 | ||||||
| static int virtballoon_probe(struct virtio_device *vdev) | static int virtballoon_probe(struct virtio_device *vdev) | ||||||
| { | { | ||||||
| 	struct virtio_balloon *vb; | 	struct virtio_balloon *vb; | ||||||
| 	struct address_space *vb_mapping; |  | ||||||
| 	struct balloon_dev_info *vb_devinfo; |  | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); | 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); | ||||||
|  | @ -446,30 +434,14 @@ static int virtballoon_probe(struct virtio_device *vdev) | ||||||
| 	vb->vdev = vdev; | 	vb->vdev = vdev; | ||||||
| 	vb->need_stats_update = 0; | 	vb->need_stats_update = 0; | ||||||
| 
 | 
 | ||||||
| 	vb_devinfo = balloon_devinfo_alloc(vb); | 	balloon_devinfo_init(&vb->vb_dev_info); | ||||||
| 	if (IS_ERR(vb_devinfo)) { | #ifdef CONFIG_BALLOON_COMPACTION | ||||||
| 		err = PTR_ERR(vb_devinfo); | 	vb->vb_dev_info.migratepage = virtballoon_migratepage; | ||||||
| 		goto out_free_vb; | #endif | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	vb_mapping = balloon_mapping_alloc(vb_devinfo, |  | ||||||
| 					   (balloon_compaction_check()) ? |  | ||||||
| 					   &virtio_balloon_aops : NULL); |  | ||||||
| 	if (IS_ERR(vb_mapping)) { |  | ||||||
| 		/*
 |  | ||||||
| 		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP |  | ||||||
| 		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off. |  | ||||||
| 		 */ |  | ||||||
| 		err = PTR_ERR(vb_mapping); |  | ||||||
| 		if (err != -EOPNOTSUPP) |  | ||||||
| 			goto out_free_vb_devinfo; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	vb->vb_dev_info = vb_devinfo; |  | ||||||
| 
 | 
 | ||||||
| 	err = init_vqs(vb); | 	err = init_vqs(vb); | ||||||
| 	if (err) | 	if (err) | ||||||
| 		goto out_free_vb_mapping; | 		goto out_free_vb; | ||||||
| 
 | 
 | ||||||
| 	vb->thread = kthread_run(balloon, vb, "vballoon"); | 	vb->thread = kthread_run(balloon, vb, "vballoon"); | ||||||
| 	if (IS_ERR(vb->thread)) { | 	if (IS_ERR(vb->thread)) { | ||||||
|  | @ -481,10 +453,6 @@ static int virtballoon_probe(struct virtio_device *vdev) | ||||||
| 
 | 
 | ||||||
| out_del_vqs: | out_del_vqs: | ||||||
| 	vdev->config->del_vqs(vdev); | 	vdev->config->del_vqs(vdev); | ||||||
| out_free_vb_mapping: |  | ||||||
| 	balloon_mapping_free(vb_mapping); |  | ||||||
| out_free_vb_devinfo: |  | ||||||
| 	balloon_devinfo_free(vb_devinfo); |  | ||||||
| out_free_vb: | out_free_vb: | ||||||
| 	kfree(vb); | 	kfree(vb); | ||||||
| out: | out: | ||||||
|  | @ -510,8 +478,6 @@ static void virtballoon_remove(struct virtio_device *vdev) | ||||||
| 
 | 
 | ||||||
| 	kthread_stop(vb->thread); | 	kthread_stop(vb->thread); | ||||||
| 	remove_common(vb); | 	remove_common(vb); | ||||||
| 	balloon_mapping_free(vb->vb_dev_info->mapping); |  | ||||||
| 	balloon_devinfo_free(vb->vb_dev_info); |  | ||||||
| 	kfree(vb); | 	kfree(vb); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -304,6 +304,12 @@ static int blkdev_readpage(struct file * file, struct page * page) | ||||||
| 	return block_read_full_page(page, blkdev_get_block); | 	return block_read_full_page(page, blkdev_get_block); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int blkdev_readpages(struct file *file, struct address_space *mapping, | ||||||
|  | 			struct list_head *pages, unsigned nr_pages) | ||||||
|  | { | ||||||
|  | 	return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int blkdev_write_begin(struct file *file, struct address_space *mapping, | static int blkdev_write_begin(struct file *file, struct address_space *mapping, | ||||||
| 			loff_t pos, unsigned len, unsigned flags, | 			loff_t pos, unsigned len, unsigned flags, | ||||||
| 			struct page **pagep, void **fsdata) | 			struct page **pagep, void **fsdata) | ||||||
|  | @ -1622,6 +1628,7 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) | ||||||
| 
 | 
 | ||||||
| static const struct address_space_operations def_blk_aops = { | static const struct address_space_operations def_blk_aops = { | ||||||
| 	.readpage	= blkdev_readpage, | 	.readpage	= blkdev_readpage, | ||||||
|  | 	.readpages	= blkdev_readpages, | ||||||
| 	.writepage	= blkdev_writepage, | 	.writepage	= blkdev_writepage, | ||||||
| 	.write_begin	= blkdev_write_begin, | 	.write_begin	= blkdev_write_begin, | ||||||
| 	.write_end	= blkdev_write_end, | 	.write_end	= blkdev_write_end, | ||||||
|  |  | ||||||
							
								
								
									
										28
									
								
								fs/buffer.c
									
										
									
									
									
								
							
							
						
						
									
										28
									
								
								fs/buffer.c
									
										
									
									
									
								
							|  | @ -1253,7 +1253,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) | ||||||
|  * a local interrupt disable for that. |  * a local interrupt disable for that. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| #define BH_LRU_SIZE	8 | #define BH_LRU_SIZE	16 | ||||||
| 
 | 
 | ||||||
| struct bh_lru { | struct bh_lru { | ||||||
| 	struct buffer_head *bhs[BH_LRU_SIZE]; | 	struct buffer_head *bhs[BH_LRU_SIZE]; | ||||||
|  | @ -2956,7 +2956,7 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * This allows us to do IO even on the odd last sectors |  * This allows us to do IO even on the odd last sectors | ||||||
|  * of a device, even if the bh block size is some multiple |  * of a device, even if the block size is some multiple | ||||||
|  * of the physical sector size. |  * of the physical sector size. | ||||||
|  * |  * | ||||||
|  * We'll just truncate the bio to the size of the device, |  * We'll just truncate the bio to the size of the device, | ||||||
|  | @ -2966,10 +2966,11 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | ||||||
|  * errors, this only handles the "we need to be able to |  * errors, this only handles the "we need to be able to | ||||||
|  * do IO at the final sector" case. |  * do IO at the final sector" case. | ||||||
|  */ |  */ | ||||||
| static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | void guard_bio_eod(int rw, struct bio *bio) | ||||||
| { | { | ||||||
| 	sector_t maxsector; | 	sector_t maxsector; | ||||||
| 	unsigned bytes; | 	struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; | ||||||
|  | 	unsigned truncated_bytes; | ||||||
| 
 | 
 | ||||||
| 	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | 	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | ||||||
| 	if (!maxsector) | 	if (!maxsector) | ||||||
|  | @ -2984,23 +2985,20 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	maxsector -= bio->bi_iter.bi_sector; | 	maxsector -= bio->bi_iter.bi_sector; | ||||||
| 	bytes = bio->bi_iter.bi_size; | 	if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) | ||||||
| 	if (likely((bytes >> 9) <= maxsector)) |  | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	/* Uhhuh. We've got a bh that straddles the device size! */ | 	/* Uhhuh. We've got a bio that straddles the device size! */ | ||||||
| 	bytes = maxsector << 9; | 	truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); | ||||||
| 
 | 
 | ||||||
| 	/* Truncate the bio.. */ | 	/* Truncate the bio.. */ | ||||||
| 	bio->bi_iter.bi_size = bytes; | 	bio->bi_iter.bi_size -= truncated_bytes; | ||||||
| 	bio->bi_io_vec[0].bv_len = bytes; | 	bvec->bv_len -= truncated_bytes; | ||||||
| 
 | 
 | ||||||
| 	/* ..and clear the end of the buffer for reads */ | 	/* ..and clear the end of the buffer for reads */ | ||||||
| 	if ((rw & RW_MASK) == READ) { | 	if ((rw & RW_MASK) == READ) { | ||||||
| 		void *kaddr = kmap_atomic(bh->b_page); | 		zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len, | ||||||
| 		memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); | 				truncated_bytes); | ||||||
| 		kunmap_atomic(kaddr); |  | ||||||
| 		flush_dcache_page(bh->b_page); |  | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -3041,7 +3039,7 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) | ||||||
| 	bio->bi_flags |= bio_flags; | 	bio->bi_flags |= bio_flags; | ||||||
| 
 | 
 | ||||||
| 	/* Take care of bh's that straddle the end of the device */ | 	/* Take care of bh's that straddle the end of the device */ | ||||||
| 	guard_bh_eod(rw, bio, bh); | 	guard_bio_eod(rw, bio); | ||||||
| 
 | 
 | ||||||
| 	if (buffer_meta(bh)) | 	if (buffer_meta(bh)) | ||||||
| 		rw |= REQ_META; | 		rw |= REQ_META; | ||||||
|  |  | ||||||
|  | @ -34,6 +34,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * buffer.c | ||||||
|  |  */ | ||||||
|  | extern void guard_bio_eod(int rw, struct bio *bio); | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * char_dev.c |  * char_dev.c | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | @ -28,6 +28,7 @@ | ||||||
| #include <linux/backing-dev.h> | #include <linux/backing-dev.h> | ||||||
| #include <linux/pagevec.h> | #include <linux/pagevec.h> | ||||||
| #include <linux/cleancache.h> | #include <linux/cleancache.h> | ||||||
|  | #include "internal.h" | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * I/O completion handler for multipage BIOs. |  * I/O completion handler for multipage BIOs. | ||||||
|  | @ -57,6 +58,7 @@ static void mpage_end_io(struct bio *bio, int err) | ||||||
| static struct bio *mpage_bio_submit(int rw, struct bio *bio) | static struct bio *mpage_bio_submit(int rw, struct bio *bio) | ||||||
| { | { | ||||||
| 	bio->bi_end_io = mpage_end_io; | 	bio->bi_end_io = mpage_end_io; | ||||||
|  | 	guard_bio_eod(rw, bio); | ||||||
| 	submit_bio(rw, bio); | 	submit_bio(rw, bio); | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -78,7 +78,7 @@ static int create_fd(struct fsnotify_group *group, | ||||||
| 
 | 
 | ||||||
| 	pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 	pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||||||
| 
 | 
 | ||||||
| 	client_fd = get_unused_fd(); | 	client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); | ||||||
| 	if (client_fd < 0) | 	if (client_fd < 0) | ||||||
| 		return client_fd; | 		return client_fd; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -23,9 +23,6 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | ||||||
| 				      struct fsnotify_group *group, struct vfsmount *mnt, | 				      struct fsnotify_group *group, struct vfsmount *mnt, | ||||||
| 				      int allow_dups); | 				      int allow_dups); | ||||||
| 
 | 
 | ||||||
| /* final kfree of a group */ |  | ||||||
| extern void fsnotify_final_destroy_group(struct fsnotify_group *group); |  | ||||||
| 
 |  | ||||||
| /* vfsmount specific destruction of a mark */ | /* vfsmount specific destruction of a mark */ | ||||||
| extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); | extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); | ||||||
| /* inode specific destruction of a mark */ | /* inode specific destruction of a mark */ | ||||||
|  |  | ||||||
|  | @ -31,7 +31,7 @@ | ||||||
| /*
 | /*
 | ||||||
|  * Final freeing of a group |  * Final freeing of a group | ||||||
|  */ |  */ | ||||||
| void fsnotify_final_destroy_group(struct fsnotify_group *group) | static void fsnotify_final_destroy_group(struct fsnotify_group *group) | ||||||
| { | { | ||||||
| 	if (group->ops->free_group_priv) | 	if (group->ops->free_group_priv) | ||||||
| 		group->ops->free_group_priv(group); | 		group->ops->free_group_priv(group); | ||||||
|  |  | ||||||
|  | @ -165,9 +165,11 @@ static void inotify_free_group_priv(struct fsnotify_group *group) | ||||||
| 	/* ideally the idr is empty and we won't hit the BUG in the callback */ | 	/* ideally the idr is empty and we won't hit the BUG in the callback */ | ||||||
| 	idr_for_each(&group->inotify_data.idr, idr_callback, group); | 	idr_for_each(&group->inotify_data.idr, idr_callback, group); | ||||||
| 	idr_destroy(&group->inotify_data.idr); | 	idr_destroy(&group->inotify_data.idr); | ||||||
|  | 	if (group->inotify_data.user) { | ||||||
| 		atomic_dec(&group->inotify_data.user->inotify_devs); | 		atomic_dec(&group->inotify_data.user->inotify_devs); | ||||||
| 		free_uid(group->inotify_data.user); | 		free_uid(group->inotify_data.user); | ||||||
| 	} | 	} | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| static void inotify_free_event(struct fsnotify_event *fsn_event) | static void inotify_free_event(struct fsnotify_event *fsn_event) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -1,7 +1,7 @@ | ||||||
| /*
 | /*
 | ||||||
|  * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project. |  * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project. | ||||||
|  * |  * | ||||||
|  * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. |  * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. | ||||||
|  * |  * | ||||||
|  * This program/include file is free software; you can redistribute it and/or |  * This program/include file is free software; you can redistribute it and/or | ||||||
|  * modify it under the terms of the GNU General Public License as published |  * modify it under the terms of the GNU General Public License as published | ||||||
|  | @ -410,7 +410,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | ||||||
| 	BUG_ON(!nr_pages); | 	BUG_ON(!nr_pages); | ||||||
| 	err = nr = 0; | 	err = nr = 0; | ||||||
| 	do { | 	do { | ||||||
| 		pages[nr] = find_lock_page(mapping, index); | 		pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK | | ||||||
|  | 				FGP_ACCESSED); | ||||||
| 		if (!pages[nr]) { | 		if (!pages[nr]) { | ||||||
| 			if (!*cached_page) { | 			if (!*cached_page) { | ||||||
| 				*cached_page = page_cache_alloc(mapping); | 				*cached_page = page_cache_alloc(mapping); | ||||||
|  |  | ||||||
|  | @ -3208,7 +3208,7 @@ static void __exit exit_ntfs_fs(void) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); | MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); | ||||||
| MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc."); | MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc."); | ||||||
| MODULE_VERSION(NTFS_VERSION); | MODULE_VERSION(NTFS_VERSION); | ||||||
| MODULE_LICENSE("GPL"); | MODULE_LICENSE("GPL"); | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
|  |  | ||||||
|  | @ -1481,8 +1481,16 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, | ||||||
| 	handle_t *handle; | 	handle_t *handle; | ||||||
| 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; | 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; | ||||||
| 
 | 
 | ||||||
|  | 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||||||
|  | 	if (IS_ERR(handle)) { | ||||||
|  | 		ret = PTR_ERR(handle); | ||||||
|  | 		mlog_errno(ret); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	page = find_or_create_page(mapping, 0, GFP_NOFS); | 	page = find_or_create_page(mapping, 0, GFP_NOFS); | ||||||
| 	if (!page) { | 	if (!page) { | ||||||
|  | 		ocfs2_commit_trans(osb, handle); | ||||||
| 		ret = -ENOMEM; | 		ret = -ENOMEM; | ||||||
| 		mlog_errno(ret); | 		mlog_errno(ret); | ||||||
| 		goto out; | 		goto out; | ||||||
|  | @ -1494,13 +1502,6 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, | ||||||
| 	wc->w_pages[0] = wc->w_target_page = page; | 	wc->w_pages[0] = wc->w_target_page = page; | ||||||
| 	wc->w_num_pages = 1; | 	wc->w_num_pages = 1; | ||||||
| 
 | 
 | ||||||
| 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |  | ||||||
| 	if (IS_ERR(handle)) { |  | ||||||
| 		ret = PTR_ERR(handle); |  | ||||||
| 		mlog_errno(ret); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, | 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, | ||||||
| 				      OCFS2_JOURNAL_ACCESS_WRITE); | 				      OCFS2_JOURNAL_ACCESS_WRITE); | ||||||
| 	if (ret) { | 	if (ret) { | ||||||
|  |  | ||||||
|  | @ -2572,6 +2572,25 @@ int o2hb_check_node_heartbeating(u8 node_num) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); | EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); | ||||||
| 
 | 
 | ||||||
|  | int o2hb_check_node_heartbeating_no_sem(u8 node_num) | ||||||
|  | { | ||||||
|  | 	unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||||||
|  | 	unsigned long flags; | ||||||
|  | 
 | ||||||
|  | 	spin_lock_irqsave(&o2hb_live_lock, flags); | ||||||
|  | 	o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); | ||||||
|  | 	spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||||||
|  | 	if (!test_bit(node_num, testing_map)) { | ||||||
|  | 		mlog(ML_HEARTBEAT, | ||||||
|  | 		     "node (%u) does not have heartbeating enabled.\n", | ||||||
|  | 		     node_num); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 1; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_no_sem); | ||||||
|  | 
 | ||||||
| int o2hb_check_node_heartbeating_from_callback(u8 node_num) | int o2hb_check_node_heartbeating_from_callback(u8 node_num) | ||||||
| { | { | ||||||
| 	unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 	unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||||||
|  |  | ||||||
|  | @ -80,6 +80,7 @@ void o2hb_fill_node_map(unsigned long *map, | ||||||
| void o2hb_exit(void); | void o2hb_exit(void); | ||||||
| int o2hb_init(void); | int o2hb_init(void); | ||||||
| int o2hb_check_node_heartbeating(u8 node_num); | int o2hb_check_node_heartbeating(u8 node_num); | ||||||
|  | int o2hb_check_node_heartbeating_no_sem(u8 node_num); | ||||||
| int o2hb_check_node_heartbeating_from_callback(u8 node_num); | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | ||||||
| int o2hb_check_local_node_heartbeating(void); | int o2hb_check_local_node_heartbeating(void); | ||||||
| void o2hb_stop_all_regions(void); | void o2hb_stop_all_regions(void); | ||||||
|  |  | ||||||
|  | @ -185,29 +185,13 @@ static const struct seq_operations nst_seq_ops = { | ||||||
| static int nst_fop_open(struct inode *inode, struct file *file) | static int nst_fop_open(struct inode *inode, struct file *file) | ||||||
| { | { | ||||||
| 	struct o2net_send_tracking *dummy_nst; | 	struct o2net_send_tracking *dummy_nst; | ||||||
| 	struct seq_file *seq; |  | ||||||
| 	int ret; |  | ||||||
| 
 | 
 | ||||||
| 	dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL); | 	dummy_nst = __seq_open_private(file, &nst_seq_ops, sizeof(*dummy_nst)); | ||||||
| 	if (dummy_nst == NULL) { | 	if (!dummy_nst) | ||||||
| 		ret = -ENOMEM; | 		return -ENOMEM; | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 	dummy_nst->st_task = NULL; |  | ||||||
| 
 |  | ||||||
| 	ret = seq_open(file, &nst_seq_ops); |  | ||||||
| 	if (ret) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	seq = file->private_data; |  | ||||||
| 	seq->private = dummy_nst; |  | ||||||
| 	o2net_debug_add_nst(dummy_nst); | 	o2net_debug_add_nst(dummy_nst); | ||||||
| 
 | 
 | ||||||
| 	dummy_nst = NULL; | 	return 0; | ||||||
| 
 |  | ||||||
| out: |  | ||||||
| 	kfree(dummy_nst); |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int nst_fop_release(struct inode *inode, struct file *file) | static int nst_fop_release(struct inode *inode, struct file *file) | ||||||
|  | @ -412,33 +396,27 @@ static const struct seq_operations sc_seq_ops = { | ||||||
| 	.show = sc_seq_show, | 	.show = sc_seq_show, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static int sc_common_open(struct file *file, struct o2net_sock_debug *sd) | static int sc_common_open(struct file *file, int ctxt) | ||||||
| { | { | ||||||
|  | 	struct o2net_sock_debug *sd; | ||||||
| 	struct o2net_sock_container *dummy_sc; | 	struct o2net_sock_container *dummy_sc; | ||||||
| 	struct seq_file *seq; |  | ||||||
| 	int ret; |  | ||||||
| 
 | 
 | ||||||
| 	dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL); | 	dummy_sc = kzalloc(sizeof(*dummy_sc), GFP_KERNEL); | ||||||
| 	if (dummy_sc == NULL) { | 	if (!dummy_sc) | ||||||
| 		ret = -ENOMEM; | 		return -ENOMEM; | ||||||
| 		goto out; | 
 | ||||||
|  | 	sd = __seq_open_private(file, &sc_seq_ops, sizeof(*sd)); | ||||||
|  | 	if (!sd) { | ||||||
|  | 		kfree(dummy_sc); | ||||||
|  | 		return -ENOMEM; | ||||||
| 	} | 	} | ||||||
| 	dummy_sc->sc_page = NULL; |  | ||||||
| 
 | 
 | ||||||
| 	ret = seq_open(file, &sc_seq_ops); | 	sd->dbg_ctxt = ctxt; | ||||||
| 	if (ret) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	seq = file->private_data; |  | ||||||
| 	seq->private = sd; |  | ||||||
| 	sd->dbg_sock = dummy_sc; | 	sd->dbg_sock = dummy_sc; | ||||||
|  | 
 | ||||||
| 	o2net_debug_add_sc(dummy_sc); | 	o2net_debug_add_sc(dummy_sc); | ||||||
| 
 | 
 | ||||||
| 	dummy_sc = NULL; | 	return 0; | ||||||
| 
 |  | ||||||
| out: |  | ||||||
| 	kfree(dummy_sc); |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int sc_fop_release(struct inode *inode, struct file *file) | static int sc_fop_release(struct inode *inode, struct file *file) | ||||||
|  | @ -453,16 +431,7 @@ static int sc_fop_release(struct inode *inode, struct file *file) | ||||||
| 
 | 
 | ||||||
| static int stats_fop_open(struct inode *inode, struct file *file) | static int stats_fop_open(struct inode *inode, struct file *file) | ||||||
| { | { | ||||||
| 	struct o2net_sock_debug *sd; | 	return sc_common_open(file, SHOW_SOCK_STATS); | ||||||
| 
 |  | ||||||
| 	sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); |  | ||||||
| 	if (sd == NULL) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 
 |  | ||||||
| 	sd->dbg_ctxt = SHOW_SOCK_STATS; |  | ||||||
| 	sd->dbg_sock = NULL; |  | ||||||
| 
 |  | ||||||
| 	return sc_common_open(file, sd); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct file_operations stats_seq_fops = { | static const struct file_operations stats_seq_fops = { | ||||||
|  | @ -474,16 +443,7 @@ static const struct file_operations stats_seq_fops = { | ||||||
| 
 | 
 | ||||||
| static int sc_fop_open(struct inode *inode, struct file *file) | static int sc_fop_open(struct inode *inode, struct file *file) | ||||||
| { | { | ||||||
| 	struct o2net_sock_debug *sd; | 	return sc_common_open(file, SHOW_SOCK_CONTAINERS); | ||||||
| 
 |  | ||||||
| 	sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); |  | ||||||
| 	if (sd == NULL) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 
 |  | ||||||
| 	sd->dbg_ctxt = SHOW_SOCK_CONTAINERS; |  | ||||||
| 	sd->dbg_sock = NULL; |  | ||||||
| 
 |  | ||||||
| 	return sc_common_open(file, sd); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct file_operations sc_seq_fops = { | static const struct file_operations sc_seq_fops = { | ||||||
|  |  | ||||||
|  | @ -536,7 +536,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | ||||||
| 	if (nn->nn_persistent_error || nn->nn_sc_valid) | 	if (nn->nn_persistent_error || nn->nn_sc_valid) | ||||||
| 		wake_up(&nn->nn_sc_wq); | 		wake_up(&nn->nn_sc_wq); | ||||||
| 
 | 
 | ||||||
| 	if (!was_err && nn->nn_persistent_error) { | 	if (was_valid && !was_err && nn->nn_persistent_error) { | ||||||
| 		o2quo_conn_err(o2net_num_from_nn(nn)); | 		o2quo_conn_err(o2net_num_from_nn(nn)); | ||||||
| 		queue_delayed_work(o2net_wq, &nn->nn_still_up, | 		queue_delayed_work(o2net_wq, &nn->nn_still_up, | ||||||
| 				   msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); | 				   msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); | ||||||
|  | @ -1601,7 +1601,15 @@ static void o2net_start_connect(struct work_struct *work) | ||||||
| 	struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 	struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | ||||||
| 	int ret = 0, stop; | 	int ret = 0, stop; | ||||||
| 	unsigned int timeout; | 	unsigned int timeout; | ||||||
|  | 	unsigned int noio_flag; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * sock_create allocates the sock with GFP_KERNEL. We must set | ||||||
|  | 	 * per-process flag PF_MEMALLOC_NOIO so that all allocations done | ||||||
|  | 	 * by this process are done as if GFP_NOIO was specified. So we | ||||||
|  | 	 * are not reentering filesystem while doing memory reclaim. | ||||||
|  | 	 */ | ||||||
|  | 	noio_flag = memalloc_noio_save(); | ||||||
| 	/* if we're greater we initiate tx, otherwise we accept */ | 	/* if we're greater we initiate tx, otherwise we accept */ | ||||||
| 	if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 	if (o2nm_this_node() <= o2net_num_from_nn(nn)) | ||||||
| 		goto out; | 		goto out; | ||||||
|  | @ -1710,6 +1718,7 @@ out: | ||||||
| 	if (mynode) | 	if (mynode) | ||||||
| 		o2nm_node_put(mynode); | 		o2nm_node_put(mynode); | ||||||
| 
 | 
 | ||||||
|  | 	memalloc_noio_restore(noio_flag); | ||||||
| 	return; | 	return; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1721,7 +1730,8 @@ static void o2net_connect_expired(struct work_struct *work) | ||||||
| 	spin_lock(&nn->nn_lock); | 	spin_lock(&nn->nn_lock); | ||||||
| 	if (!nn->nn_sc_valid) { | 	if (!nn->nn_sc_valid) { | ||||||
| 		printk(KERN_NOTICE "o2net: No connection established with " | 		printk(KERN_NOTICE "o2net: No connection established with " | ||||||
| 		       "node %u after %u.%u seconds, giving up.\n", | 		       "node %u after %u.%u seconds, check network and" | ||||||
|  | 		       " cluster configuration.\n", | ||||||
| 		     o2net_num_from_nn(nn), | 		     o2net_num_from_nn(nn), | ||||||
| 		     o2net_idle_timeout() / 1000, | 		     o2net_idle_timeout() / 1000, | ||||||
| 		     o2net_idle_timeout() % 1000); | 		     o2net_idle_timeout() % 1000); | ||||||
|  | @ -1835,6 +1845,15 @@ static int o2net_accept_one(struct socket *sock, int *more) | ||||||
| 	struct o2nm_node *local_node = NULL; | 	struct o2nm_node *local_node = NULL; | ||||||
| 	struct o2net_sock_container *sc = NULL; | 	struct o2net_sock_container *sc = NULL; | ||||||
| 	struct o2net_node *nn; | 	struct o2net_node *nn; | ||||||
|  | 	unsigned int noio_flag; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * sock_create_lite allocates the sock with GFP_KERNEL. We must set | ||||||
|  | 	 * per-process flag PF_MEMALLOC_NOIO so that all allocations done | ||||||
|  | 	 * by this process are done as if GFP_NOIO was specified. So we | ||||||
|  | 	 * are not reentering filesystem while doing memory reclaim. | ||||||
|  | 	 */ | ||||||
|  | 	noio_flag = memalloc_noio_save(); | ||||||
| 
 | 
 | ||||||
| 	BUG_ON(sock == NULL); | 	BUG_ON(sock == NULL); | ||||||
| 	*more = 0; | 	*more = 0; | ||||||
|  | @ -1951,6 +1970,8 @@ out: | ||||||
| 		o2nm_node_put(local_node); | 		o2nm_node_put(local_node); | ||||||
| 	if (sc) | 	if (sc) | ||||||
| 		sc_put(sc); | 		sc_put(sc); | ||||||
|  | 
 | ||||||
|  | 	memalloc_noio_restore(noio_flag); | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2146,17 +2167,13 @@ int o2net_init(void) | ||||||
| 	o2quo_init(); | 	o2quo_init(); | ||||||
| 
 | 
 | ||||||
| 	if (o2net_debugfs_init()) | 	if (o2net_debugfs_init()) | ||||||
| 		return -ENOMEM; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); | 	o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); | ||||||
| 	o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 	o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | ||||||
| 	o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 	o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | ||||||
| 	if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) { | 	if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) | ||||||
| 		kfree(o2net_hand); | 		goto out; | ||||||
| 		kfree(o2net_keep_req); |  | ||||||
| 		kfree(o2net_keep_resp); |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); | 	o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); | ||||||
| 	o2net_hand->connector_id = cpu_to_be64(1); | 	o2net_hand->connector_id = cpu_to_be64(1); | ||||||
|  | @ -2181,6 +2198,14 @@ int o2net_init(void) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
|  | 
 | ||||||
|  | out: | ||||||
|  | 	kfree(o2net_hand); | ||||||
|  | 	kfree(o2net_keep_req); | ||||||
|  | 	kfree(o2net_keep_resp); | ||||||
|  | 
 | ||||||
|  | 	o2quo_exit(); | ||||||
|  | 	return -ENOMEM; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void o2net_exit(void) | void o2net_exit(void) | ||||||
|  |  | ||||||
|  | @ -647,41 +647,30 @@ static const struct seq_operations debug_lockres_ops = { | ||||||
| static int debug_lockres_open(struct inode *inode, struct file *file) | static int debug_lockres_open(struct inode *inode, struct file *file) | ||||||
| { | { | ||||||
| 	struct dlm_ctxt *dlm = inode->i_private; | 	struct dlm_ctxt *dlm = inode->i_private; | ||||||
| 	int ret = -ENOMEM; | 	struct debug_lockres *dl; | ||||||
| 	struct seq_file *seq; | 	void *buf; | ||||||
| 	struct debug_lockres *dl = NULL; |  | ||||||
| 
 | 
 | ||||||
| 	dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); | 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||||||
| 	if (!dl) { | 	if (!buf) | ||||||
| 		mlog_errno(ret); |  | ||||||
| 		goto bail; | 		goto bail; | ||||||
| 	} | 
 | ||||||
|  | 	dl = __seq_open_private(file, &debug_lockres_ops, sizeof(*dl)); | ||||||
|  | 	if (!dl) | ||||||
|  | 		goto bailfree; | ||||||
| 
 | 
 | ||||||
| 	dl->dl_len = PAGE_SIZE; | 	dl->dl_len = PAGE_SIZE; | ||||||
| 	dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); | 	dl->dl_buf = buf; | ||||||
| 	if (!dl->dl_buf) { |  | ||||||
| 		mlog_errno(ret); |  | ||||||
| 		goto bail; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	ret = seq_open(file, &debug_lockres_ops); |  | ||||||
| 	if (ret) { |  | ||||||
| 		mlog_errno(ret); |  | ||||||
| 		goto bail; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	seq = file->private_data; |  | ||||||
| 	seq->private = dl; |  | ||||||
| 
 | 
 | ||||||
| 	dlm_grab(dlm); | 	dlm_grab(dlm); | ||||||
| 	dl->dl_ctxt = dlm; | 	dl->dl_ctxt = dlm; | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
|  | 
 | ||||||
|  | bailfree: | ||||||
|  | 	kfree(buf); | ||||||
| bail: | bail: | ||||||
| 	if (dl) | 	mlog_errno(-ENOMEM); | ||||||
| 		kfree(dl->dl_buf); | 	return -ENOMEM; | ||||||
| 	kfree(dl); |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int debug_lockres_release(struct inode *inode, struct file *file) | static int debug_lockres_release(struct inode *inode, struct file *file) | ||||||
|  |  | ||||||
|  | @ -839,7 +839,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, | ||||||
| 	 * to back off and try again.  This gives heartbeat a chance | 	 * to back off and try again.  This gives heartbeat a chance | ||||||
| 	 * to catch up. | 	 * to catch up. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (!o2hb_check_node_heartbeating(query->node_idx)) { | 	if (!o2hb_check_node_heartbeating_no_sem(query->node_idx)) { | ||||||
| 		mlog(0, "node %u is not in our live map yet\n", | 		mlog(0, "node %u is not in our live map yet\n", | ||||||
| 		     query->node_idx); | 		     query->node_idx); | ||||||
| 
 | 
 | ||||||
|  | @ -1975,24 +1975,22 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | ||||||
| 
 | 
 | ||||||
| 	dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); | 	dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); | ||||||
| 	if (!dlm) { | 	if (!dlm) { | ||||||
| 		mlog_errno(-ENOMEM); | 		ret = -ENOMEM; | ||||||
|  | 		mlog_errno(ret); | ||||||
| 		goto leave; | 		goto leave; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	dlm->name = kstrdup(domain, GFP_KERNEL); | 	dlm->name = kstrdup(domain, GFP_KERNEL); | ||||||
| 	if (dlm->name == NULL) { | 	if (dlm->name == NULL) { | ||||||
| 		mlog_errno(-ENOMEM); | 		ret = -ENOMEM; | ||||||
| 		kfree(dlm); | 		mlog_errno(ret); | ||||||
| 		dlm = NULL; |  | ||||||
| 		goto leave; | 		goto leave; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); | 	dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); | ||||||
| 	if (!dlm->lockres_hash) { | 	if (!dlm->lockres_hash) { | ||||||
| 		mlog_errno(-ENOMEM); | 		ret = -ENOMEM; | ||||||
| 		kfree(dlm->name); | 		mlog_errno(ret); | ||||||
| 		kfree(dlm); |  | ||||||
| 		dlm = NULL; |  | ||||||
| 		goto leave; | 		goto leave; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -2002,11 +2000,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | ||||||
| 	dlm->master_hash = (struct hlist_head **) | 	dlm->master_hash = (struct hlist_head **) | ||||||
| 				dlm_alloc_pagevec(DLM_HASH_PAGES); | 				dlm_alloc_pagevec(DLM_HASH_PAGES); | ||||||
| 	if (!dlm->master_hash) { | 	if (!dlm->master_hash) { | ||||||
| 		mlog_errno(-ENOMEM); | 		ret = -ENOMEM; | ||||||
| 		dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 		mlog_errno(ret); | ||||||
| 		kfree(dlm->name); |  | ||||||
| 		kfree(dlm); |  | ||||||
| 		dlm = NULL; |  | ||||||
| 		goto leave; | 		goto leave; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -2017,14 +2012,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | ||||||
| 	dlm->node_num = o2nm_this_node(); | 	dlm->node_num = o2nm_this_node(); | ||||||
| 
 | 
 | ||||||
| 	ret = dlm_create_debugfs_subroot(dlm); | 	ret = dlm_create_debugfs_subroot(dlm); | ||||||
| 	if (ret < 0) { | 	if (ret < 0) | ||||||
| 		dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); |  | ||||||
| 		dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); |  | ||||||
| 		kfree(dlm->name); |  | ||||||
| 		kfree(dlm); |  | ||||||
| 		dlm = NULL; |  | ||||||
| 		goto leave; | 		goto leave; | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	spin_lock_init(&dlm->spinlock); | 	spin_lock_init(&dlm->spinlock); | ||||||
| 	spin_lock_init(&dlm->master_lock); | 	spin_lock_init(&dlm->master_lock); | ||||||
|  | @ -2085,6 +2074,19 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | ||||||
| 		  atomic_read(&dlm->dlm_refs.refcount)); | 		  atomic_read(&dlm->dlm_refs.refcount)); | ||||||
| 
 | 
 | ||||||
| leave: | leave: | ||||||
|  | 	if (ret < 0 && dlm) { | ||||||
|  | 		if (dlm->master_hash) | ||||||
|  | 			dlm_free_pagevec((void **)dlm->master_hash, | ||||||
|  | 					DLM_HASH_PAGES); | ||||||
|  | 
 | ||||||
|  | 		if (dlm->lockres_hash) | ||||||
|  | 			dlm_free_pagevec((void **)dlm->lockres_hash, | ||||||
|  | 					DLM_HASH_PAGES); | ||||||
|  | 
 | ||||||
|  | 		kfree(dlm->name); | ||||||
|  | 		kfree(dlm); | ||||||
|  | 		dlm = NULL; | ||||||
|  | 	} | ||||||
| 	return dlm; | 	return dlm; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -625,9 +625,6 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | ||||||
| 	return res; | 	return res; | ||||||
| 
 | 
 | ||||||
| error: | error: | ||||||
| 	if (res && res->lockname.name) |  | ||||||
| 		kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); |  | ||||||
| 
 |  | ||||||
| 	if (res) | 	if (res) | ||||||
| 		kmem_cache_free(dlm_lockres_cache, res); | 		kmem_cache_free(dlm_lockres_cache, res); | ||||||
| 	return NULL; | 	return NULL; | ||||||
|  |  | ||||||
|  | @ -1710,9 +1710,12 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | ||||||
| 				BUG(); | 				BUG(); | ||||||
| 			} else | 			} else | ||||||
| 				__dlm_lockres_grab_inflight_worker(dlm, res); | 				__dlm_lockres_grab_inflight_worker(dlm, res); | ||||||
| 		} else /* put.. incase we are not the master */ |  | ||||||
| 			dlm_lockres_put(res); |  | ||||||
| 			spin_unlock(&res->spinlock); | 			spin_unlock(&res->spinlock); | ||||||
|  | 		} else { | ||||||
|  | 			/* put.. incase we are not the master */ | ||||||
|  | 			spin_unlock(&res->spinlock); | ||||||
|  | 			dlm_lockres_put(res); | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	spin_unlock(&dlm->spinlock); | 	spin_unlock(&dlm->spinlock); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -2892,37 +2892,24 @@ static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | ||||||
| 
 | 
 | ||||||
| static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | ||||||
| { | { | ||||||
| 	int ret; |  | ||||||
| 	struct ocfs2_dlm_seq_priv *priv; | 	struct ocfs2_dlm_seq_priv *priv; | ||||||
| 	struct seq_file *seq; |  | ||||||
| 	struct ocfs2_super *osb; | 	struct ocfs2_super *osb; | ||||||
| 
 | 
 | ||||||
| 	priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); | 	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv)); | ||||||
| 	if (!priv) { | 	if (!priv) { | ||||||
| 		ret = -ENOMEM; | 		mlog_errno(-ENOMEM); | ||||||
| 		mlog_errno(ret); | 		return -ENOMEM; | ||||||
| 		goto out; |  | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	osb = inode->i_private; | 	osb = inode->i_private; | ||||||
| 	ocfs2_get_dlm_debug(osb->osb_dlm_debug); | 	ocfs2_get_dlm_debug(osb->osb_dlm_debug); | ||||||
| 	priv->p_dlm_debug = osb->osb_dlm_debug; | 	priv->p_dlm_debug = osb->osb_dlm_debug; | ||||||
| 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | ||||||
| 
 | 
 | ||||||
| 	ret = seq_open(file, &ocfs2_dlm_seq_ops); |  | ||||||
| 	if (ret) { |  | ||||||
| 		kfree(priv); |  | ||||||
| 		mlog_errno(ret); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	seq = file->private_data; |  | ||||||
| 	seq->private = priv; |  | ||||||
| 
 |  | ||||||
| 	ocfs2_add_lockres_tracking(&priv->p_iter_res, | 	ocfs2_add_lockres_tracking(&priv->p_iter_res, | ||||||
| 				   priv->p_dlm_debug); | 				   priv->p_dlm_debug); | ||||||
| 
 | 
 | ||||||
| out: | 	return 0; | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct file_operations ocfs2_dlm_debug_fops = { | static const struct file_operations ocfs2_dlm_debug_fops = { | ||||||
|  |  | ||||||
|  | @ -760,7 +760,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | ||||||
| 	struct address_space *mapping = inode->i_mapping; | 	struct address_space *mapping = inode->i_mapping; | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 	unsigned long index = abs_from >> PAGE_CACHE_SHIFT; | 	unsigned long index = abs_from >> PAGE_CACHE_SHIFT; | ||||||
| 	handle_t *handle = NULL; | 	handle_t *handle; | ||||||
| 	int ret = 0; | 	int ret = 0; | ||||||
| 	unsigned zero_from, zero_to, block_start, block_end; | 	unsigned zero_from, zero_to, block_start, block_end; | ||||||
| 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||||||
|  | @ -769,11 +769,17 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | ||||||
| 	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | 	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | ||||||
| 	BUG_ON(abs_from & (inode->i_blkbits - 1)); | 	BUG_ON(abs_from & (inode->i_blkbits - 1)); | ||||||
| 
 | 
 | ||||||
|  | 	handle = ocfs2_zero_start_ordered_transaction(inode, di_bh); | ||||||
|  | 	if (IS_ERR(handle)) { | ||||||
|  | 		ret = PTR_ERR(handle); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	page = find_or_create_page(mapping, index, GFP_NOFS); | 	page = find_or_create_page(mapping, index, GFP_NOFS); | ||||||
| 	if (!page) { | 	if (!page) { | ||||||
| 		ret = -ENOMEM; | 		ret = -ENOMEM; | ||||||
| 		mlog_errno(ret); | 		mlog_errno(ret); | ||||||
| 		goto out; | 		goto out_commit_trans; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* Get the offsets within the page that we want to zero */ | 	/* Get the offsets within the page that we want to zero */ | ||||||
|  | @ -805,15 +811,6 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | ||||||
| 			goto out_unlock; | 			goto out_unlock; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if (!handle) { |  | ||||||
| 			handle = ocfs2_zero_start_ordered_transaction(inode, |  | ||||||
| 								      di_bh); |  | ||||||
| 			if (IS_ERR(handle)) { |  | ||||||
| 				ret = PTR_ERR(handle); |  | ||||||
| 				handle = NULL; |  | ||||||
| 				break; |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 
 | 
 | ||||||
| 		/* must not update i_size! */ | 		/* must not update i_size! */ | ||||||
| 		ret = block_commit_write(page, block_start + 1, | 		ret = block_commit_write(page, block_start + 1, | ||||||
|  | @ -824,7 +821,6 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | ||||||
| 			ret = 0; | 			ret = 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (handle) { |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * fs-writeback will release the dirty pages without page lock | 	 * fs-writeback will release the dirty pages without page lock | ||||||
| 	 * whose offset are over inode size, the release happens at | 	 * whose offset are over inode size, the release happens at | ||||||
|  | @ -837,14 +833,17 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | ||||||
| 	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | ||||||
| 	di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 	di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | ||||||
| 	di->i_mtime_nsec = di->i_ctime_nsec; | 	di->i_mtime_nsec = di->i_ctime_nsec; | ||||||
|  | 	if (handle) { | ||||||
| 		ocfs2_journal_dirty(handle, di_bh); | 		ocfs2_journal_dirty(handle, di_bh); | ||||||
| 		ocfs2_update_inode_fsync_trans(handle, inode, 1); | 		ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||||||
| 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| out_unlock: | out_unlock: | ||||||
| 	unlock_page(page); | 	unlock_page(page); | ||||||
| 	page_cache_release(page); | 	page_cache_release(page); | ||||||
|  | out_commit_trans: | ||||||
|  | 	if (handle) | ||||||
|  | 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||||||
| out: | out: | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -162,7 +162,7 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) | ||||||
| { | { | ||||||
| 	int c_to_s_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits - 9; | 	int c_to_s_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits - 9; | ||||||
| 
 | 
 | ||||||
| 	return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); | 	return (blkcnt_t)OCFS2_I(inode)->ip_clusters << c_to_s_bits; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Validate that a bh contains a valid inode */ | /* Validate that a bh contains a valid inode */ | ||||||
|  |  | ||||||
|  | @ -404,7 +404,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, | ||||||
| 	 * 'vict_blkno' was out of the valid range. | 	 * 'vict_blkno' was out of the valid range. | ||||||
| 	 */ | 	 */ | ||||||
| 	if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || | 	if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || | ||||||
| 	    (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << | 	    (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << | ||||||
| 				bits_per_unit))) { | 				bits_per_unit))) { | ||||||
| 		ret = -EINVAL; | 		ret = -EINVAL; | ||||||
| 		goto out; | 		goto out; | ||||||
|  |  | ||||||
|  | @ -591,7 +591,7 @@ static int ocfs2_control_release(struct inode *inode, struct file *file) | ||||||
| 		 */ | 		 */ | ||||||
| 		ocfs2_control_this_node = -1; | 		ocfs2_control_this_node = -1; | ||||||
| 		running_proto.pv_major = 0; | 		running_proto.pv_major = 0; | ||||||
| 		running_proto.pv_major = 0; | 		running_proto.pv_minor = 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| out: | out: | ||||||
|  |  | ||||||
|  | @ -632,29 +632,35 @@ static const struct file_operations proc_single_file_operations = { | ||||||
| 	.release	= single_release, | 	.release	= single_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) | 
 | ||||||
|  | struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) | ||||||
| { | { | ||||||
| 	struct task_struct *task = get_proc_task(file_inode(file)); | 	struct task_struct *task = get_proc_task(inode); | ||||||
| 	struct mm_struct *mm; | 	struct mm_struct *mm = ERR_PTR(-ESRCH); | ||||||
| 
 |  | ||||||
| 	if (!task) |  | ||||||
| 		return -ESRCH; |  | ||||||
| 
 | 
 | ||||||
|  | 	if (task) { | ||||||
| 		mm = mm_access(task, mode); | 		mm = mm_access(task, mode); | ||||||
| 		put_task_struct(task); | 		put_task_struct(task); | ||||||
| 
 | 
 | ||||||
| 	if (IS_ERR(mm)) | 		if (!IS_ERR_OR_NULL(mm)) { | ||||||
| 		return PTR_ERR(mm); |  | ||||||
| 
 |  | ||||||
| 	if (mm) { |  | ||||||
| 			/* ensure this mm_struct can't be freed */ | 			/* ensure this mm_struct can't be freed */ | ||||||
| 			atomic_inc(&mm->mm_count); | 			atomic_inc(&mm->mm_count); | ||||||
| 			/* but do not pin its memory */ | 			/* but do not pin its memory */ | ||||||
| 			mmput(mm); | 			mmput(mm); | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return mm; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) | ||||||
|  | { | ||||||
|  | 	struct mm_struct *mm = proc_mem_open(inode, mode); | ||||||
|  | 
 | ||||||
|  | 	if (IS_ERR(mm)) | ||||||
|  | 		return PTR_ERR(mm); | ||||||
| 
 | 
 | ||||||
| 	file->private_data = mm; | 	file->private_data = mm; | ||||||
| 
 |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -268,8 +268,9 @@ extern int proc_remount(struct super_block *, int *, char *); | ||||||
|  * task_[no]mmu.c |  * task_[no]mmu.c | ||||||
|  */ |  */ | ||||||
| struct proc_maps_private { | struct proc_maps_private { | ||||||
| 	struct pid *pid; | 	struct inode *inode; | ||||||
| 	struct task_struct *task; | 	struct task_struct *task; | ||||||
|  | 	struct mm_struct *mm; | ||||||
| #ifdef CONFIG_MMU | #ifdef CONFIG_MMU | ||||||
| 	struct vm_area_struct *tail_vma; | 	struct vm_area_struct *tail_vma; | ||||||
| #endif | #endif | ||||||
|  | @ -278,6 +279,8 @@ struct proc_maps_private { | ||||||
| #endif | #endif | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); | ||||||
|  | 
 | ||||||
| extern const struct file_operations proc_pid_maps_operations; | extern const struct file_operations proc_pid_maps_operations; | ||||||
| extern const struct file_operations proc_tid_maps_operations; | extern const struct file_operations proc_tid_maps_operations; | ||||||
| extern const struct file_operations proc_pid_numa_maps_operations; | extern const struct file_operations proc_pid_numa_maps_operations; | ||||||
|  |  | ||||||
|  | @ -610,9 +610,11 @@ static void __init proc_kcore_text_init(void) | ||||||
| struct kcore_list kcore_modules; | struct kcore_list kcore_modules; | ||||||
| static void __init add_modules_range(void) | static void __init add_modules_range(void) | ||||||
| { | { | ||||||
|  | 	if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { | ||||||
| 		kclist_add(&kcore_modules, (void *)MODULES_VADDR, | 		kclist_add(&kcore_modules, (void *)MODULES_VADDR, | ||||||
| 			MODULES_END - MODULES_VADDR, KCORE_VMALLOC); | 			MODULES_END - MODULES_VADDR, KCORE_VMALLOC); | ||||||
| 	} | 	} | ||||||
|  | } | ||||||
| #else | #else | ||||||
| static void __init add_modules_range(void) | static void __init add_modules_range(void) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page) | ||||||
| 	if (PageBuddy(page)) | 	if (PageBuddy(page)) | ||||||
| 		u |= 1 << KPF_BUDDY; | 		u |= 1 << KPF_BUDDY; | ||||||
| 
 | 
 | ||||||
|  | 	if (PageBalloon(page)) | ||||||
|  | 		u |= 1 << KPF_BALLOON; | ||||||
|  | 
 | ||||||
| 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked); | 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked); | ||||||
| 
 | 
 | ||||||
| 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab); | 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab); | ||||||
|  |  | ||||||
|  | @ -87,32 +87,14 @@ unsigned long task_statm(struct mm_struct *mm, | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_NUMA | #ifdef CONFIG_NUMA | ||||||
| /*
 | /*
 | ||||||
|  * These functions are for numa_maps but called in generic **maps seq_file |  * Save get_task_policy() for show_numa_map(). | ||||||
|  * ->start(), ->stop() ops. |  | ||||||
|  * |  | ||||||
|  * numa_maps scans all vmas under mmap_sem and checks their mempolicy. |  | ||||||
|  * Each mempolicy object is controlled by reference counting. The problem here |  | ||||||
|  * is how to avoid accessing dead mempolicy object. |  | ||||||
|  * |  | ||||||
|  * Because we're holding mmap_sem while reading seq_file, it's safe to access |  | ||||||
|  * each vma's mempolicy, no vma objects will never drop refs to mempolicy. |  | ||||||
|  * |  | ||||||
|  * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy |  | ||||||
|  * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). |  | ||||||
|  * So, without task_lock(), we cannot trust get_vma_policy() because we cannot |  | ||||||
|  * gurantee the task never exits under us. But taking task_lock() around |  | ||||||
|  * get_vma_plicy() causes lock order problem. |  | ||||||
|  * |  | ||||||
|  * To access task->mempolicy without lock, we hold a reference count of an |  | ||||||
|  * object pointed by task->mempolicy and remember it. This will guarantee |  | ||||||
|  * that task->mempolicy points to an alive object or NULL in numa_maps accesses. |  | ||||||
|  */ |  */ | ||||||
| static void hold_task_mempolicy(struct proc_maps_private *priv) | static void hold_task_mempolicy(struct proc_maps_private *priv) | ||||||
| { | { | ||||||
| 	struct task_struct *task = priv->task; | 	struct task_struct *task = priv->task; | ||||||
| 
 | 
 | ||||||
| 	task_lock(task); | 	task_lock(task); | ||||||
| 	priv->task_mempolicy = task->mempolicy; | 	priv->task_mempolicy = get_task_policy(task); | ||||||
| 	mpol_get(priv->task_mempolicy); | 	mpol_get(priv->task_mempolicy); | ||||||
| 	task_unlock(task); | 	task_unlock(task); | ||||||
| } | } | ||||||
|  | @ -129,124 +111,154 @@ static void release_task_mempolicy(struct proc_maps_private *priv) | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | static void vma_stop(struct proc_maps_private *priv) | ||||||
| { | { | ||||||
| 	if (vma && vma != priv->tail_vma) { | 	struct mm_struct *mm = priv->mm; | ||||||
| 		struct mm_struct *mm = vma->vm_mm; | 
 | ||||||
| 	release_task_mempolicy(priv); | 	release_task_mempolicy(priv); | ||||||
| 	up_read(&mm->mmap_sem); | 	up_read(&mm->mmap_sem); | ||||||
| 	mmput(mm); | 	mmput(mm); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | static struct vm_area_struct * | ||||||
|  | m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) | ||||||
|  | { | ||||||
|  | 	if (vma == priv->tail_vma) | ||||||
|  | 		return NULL; | ||||||
|  | 	return vma->vm_next ?: priv->tail_vma; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void *m_start(struct seq_file *m, loff_t *pos) | static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) | ||||||
|  | { | ||||||
|  | 	if (m->count < m->size)	/* vma is copied successfully */ | ||||||
|  | 		m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *m_start(struct seq_file *m, loff_t *ppos) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv = m->private; | 	struct proc_maps_private *priv = m->private; | ||||||
| 	unsigned long last_addr = m->version; | 	unsigned long last_addr = m->version; | ||||||
| 	struct mm_struct *mm; | 	struct mm_struct *mm; | ||||||
| 	struct vm_area_struct *vma, *tail_vma = NULL; | 	struct vm_area_struct *vma; | ||||||
| 	loff_t l = *pos; | 	unsigned int pos = *ppos; | ||||||
| 
 |  | ||||||
| 	/* Clear the per syscall fields in priv */ |  | ||||||
| 	priv->task = NULL; |  | ||||||
| 	priv->tail_vma = NULL; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * We remember last_addr rather than next_addr to hit with |  | ||||||
| 	 * vmacache most of the time. We have zero last_addr at |  | ||||||
| 	 * the beginning and also after lseek. We will have -1 last_addr |  | ||||||
| 	 * after the end of the vmas. |  | ||||||
| 	 */ |  | ||||||
| 
 | 
 | ||||||
|  | 	/* See m_cache_vma(). Zero at the start or after lseek. */ | ||||||
| 	if (last_addr == -1UL) | 	if (last_addr == -1UL) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 	priv->task = get_proc_task(priv->inode); | ||||||
| 	if (!priv->task) | 	if (!priv->task) | ||||||
| 		return ERR_PTR(-ESRCH); | 		return ERR_PTR(-ESRCH); | ||||||
| 
 | 
 | ||||||
| 	mm = mm_access(priv->task, PTRACE_MODE_READ); | 	mm = priv->mm; | ||||||
| 	if (!mm || IS_ERR(mm)) | 	if (!mm || !atomic_inc_not_zero(&mm->mm_users)) | ||||||
| 		return mm; | 		return NULL; | ||||||
|  | 
 | ||||||
| 	down_read(&mm->mmap_sem); | 	down_read(&mm->mmap_sem); | ||||||
| 
 |  | ||||||
| 	tail_vma = get_gate_vma(priv->task->mm); |  | ||||||
| 	priv->tail_vma = tail_vma; |  | ||||||
| 	hold_task_mempolicy(priv); | 	hold_task_mempolicy(priv); | ||||||
| 	/* Start with last addr hint */ | 	priv->tail_vma = get_gate_vma(mm); | ||||||
|  | 
 | ||||||
|  | 	if (last_addr) { | ||||||
| 		vma = find_vma(mm, last_addr); | 		vma = find_vma(mm, last_addr); | ||||||
| 	if (last_addr && vma) { | 		if (vma && (vma = m_next_vma(priv, vma))) | ||||||
| 		vma = vma->vm_next; |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Check the vma index is within the range and do |  | ||||||
| 	 * sequential scan until m_index. |  | ||||||
| 	 */ |  | ||||||
| 	vma = NULL; |  | ||||||
| 	if ((unsigned long)l < mm->map_count) { |  | ||||||
| 		vma = mm->mmap; |  | ||||||
| 		while (l-- && vma) |  | ||||||
| 			vma = vma->vm_next; |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (l != mm->map_count) |  | ||||||
| 		tail_vma = NULL; /* After gate vma */ |  | ||||||
| 
 |  | ||||||
| out: |  | ||||||
| 	if (vma) |  | ||||||
| 			return vma; | 			return vma; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	release_task_mempolicy(priv); | 	m->version = 0; | ||||||
| 	/* End of vmas has been reached */ | 	if (pos < mm->map_count) { | ||||||
| 	m->version = (tail_vma != NULL)? 0: -1UL; | 		for (vma = mm->mmap; pos; pos--) { | ||||||
| 	up_read(&mm->mmap_sem); | 			m->version = vma->vm_start; | ||||||
| 	mmput(mm); | 			vma = vma->vm_next; | ||||||
| 	return tail_vma; | 		} | ||||||
|  | 		return vma; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* we do not bother to update m->version in this case */ | ||||||
|  | 	if (pos == mm->map_count && priv->tail_vma) | ||||||
|  | 		return priv->tail_vma; | ||||||
|  | 
 | ||||||
|  | 	vma_stop(priv); | ||||||
|  | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void *m_next(struct seq_file *m, void *v, loff_t *pos) | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv = m->private; | 	struct proc_maps_private *priv = m->private; | ||||||
| 	struct vm_area_struct *vma = v; | 	struct vm_area_struct *next; | ||||||
| 	struct vm_area_struct *tail_vma = priv->tail_vma; |  | ||||||
| 
 | 
 | ||||||
| 	(*pos)++; | 	(*pos)++; | ||||||
| 	if (vma && (vma != tail_vma) && vma->vm_next) | 	next = m_next_vma(priv, v); | ||||||
| 		return vma->vm_next; | 	if (!next) | ||||||
| 	vma_stop(priv, vma); | 		vma_stop(priv); | ||||||
| 	return (vma != tail_vma)? tail_vma: NULL; | 	return next; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void m_stop(struct seq_file *m, void *v) | static void m_stop(struct seq_file *m, void *v) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv = m->private; | 	struct proc_maps_private *priv = m->private; | ||||||
| 	struct vm_area_struct *vma = v; |  | ||||||
| 
 | 
 | ||||||
| 	if (!IS_ERR(vma)) | 	if (!IS_ERR_OR_NULL(v)) | ||||||
| 		vma_stop(priv, vma); | 		vma_stop(priv); | ||||||
| 	if (priv->task) | 	if (priv->task) { | ||||||
| 		put_task_struct(priv->task); | 		put_task_struct(priv->task); | ||||||
|  | 		priv->task = NULL; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int proc_maps_open(struct inode *inode, struct file *file, | ||||||
|  | 			const struct seq_operations *ops, int psize) | ||||||
|  | { | ||||||
|  | 	struct proc_maps_private *priv = __seq_open_private(file, ops, psize); | ||||||
|  | 
 | ||||||
|  | 	if (!priv) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 
 | ||||||
|  | 	priv->inode = inode; | ||||||
|  | 	priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); | ||||||
|  | 	if (IS_ERR(priv->mm)) { | ||||||
|  | 		int err = PTR_ERR(priv->mm); | ||||||
|  | 
 | ||||||
|  | 		seq_release_private(inode, file); | ||||||
|  | 		return err; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int proc_map_release(struct inode *inode, struct file *file) | ||||||
|  | { | ||||||
|  | 	struct seq_file *seq = file->private_data; | ||||||
|  | 	struct proc_maps_private *priv = seq->private; | ||||||
|  | 
 | ||||||
|  | 	if (priv->mm) | ||||||
|  | 		mmdrop(priv->mm); | ||||||
|  | 
 | ||||||
|  | 	return seq_release_private(inode, file); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int do_maps_open(struct inode *inode, struct file *file, | static int do_maps_open(struct inode *inode, struct file *file, | ||||||
| 			const struct seq_operations *ops) | 			const struct seq_operations *ops) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv; | 	return proc_maps_open(inode, file, ops, | ||||||
| 	int ret = -ENOMEM; | 				sizeof(struct proc_maps_private)); | ||||||
| 	priv = kzalloc(sizeof(*priv), GFP_KERNEL); |  | ||||||
| 	if (priv) { |  | ||||||
| 		priv->pid = proc_pid(inode); |  | ||||||
| 		ret = seq_open(file, ops); |  | ||||||
| 		if (!ret) { |  | ||||||
| 			struct seq_file *m = file->private_data; |  | ||||||
| 			m->private = priv; |  | ||||||
| 		} else { |  | ||||||
| 			kfree(priv); |  | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | static pid_t pid_of_stack(struct proc_maps_private *priv, | ||||||
|  | 				struct vm_area_struct *vma, bool is_pid) | ||||||
|  | { | ||||||
|  | 	struct inode *inode = priv->inode; | ||||||
|  | 	struct task_struct *task; | ||||||
|  | 	pid_t ret = 0; | ||||||
|  | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||||||
|  | 	if (task) { | ||||||
|  | 		task = task_of_stack(task, vma, is_pid); | ||||||
|  | 		if (task) | ||||||
|  | 			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | ||||||
| 	} | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -256,7 +268,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | ||||||
| 	struct mm_struct *mm = vma->vm_mm; | 	struct mm_struct *mm = vma->vm_mm; | ||||||
| 	struct file *file = vma->vm_file; | 	struct file *file = vma->vm_file; | ||||||
| 	struct proc_maps_private *priv = m->private; | 	struct proc_maps_private *priv = m->private; | ||||||
| 	struct task_struct *task = priv->task; |  | ||||||
| 	vm_flags_t flags = vma->vm_flags; | 	vm_flags_t flags = vma->vm_flags; | ||||||
| 	unsigned long ino = 0; | 	unsigned long ino = 0; | ||||||
| 	unsigned long long pgoff = 0; | 	unsigned long long pgoff = 0; | ||||||
|  | @ -321,8 +332,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		tid = vm_is_stack(task, vma, is_pid); | 		tid = pid_of_stack(priv, vma, is_pid); | ||||||
| 
 |  | ||||||
| 		if (tid != 0) { | 		if (tid != 0) { | ||||||
| 			/*
 | 			/*
 | ||||||
| 			 * Thread stack in /proc/PID/task/TID/maps or | 			 * Thread stack in /proc/PID/task/TID/maps or | ||||||
|  | @ -349,15 +359,8 @@ done: | ||||||
| 
 | 
 | ||||||
| static int show_map(struct seq_file *m, void *v, int is_pid) | static int show_map(struct seq_file *m, void *v, int is_pid) | ||||||
| { | { | ||||||
| 	struct vm_area_struct *vma = v; | 	show_map_vma(m, v, is_pid); | ||||||
| 	struct proc_maps_private *priv = m->private; | 	m_cache_vma(m, v); | ||||||
| 	struct task_struct *task = priv->task; |  | ||||||
| 
 |  | ||||||
| 	show_map_vma(m, vma, is_pid); |  | ||||||
| 
 |  | ||||||
| 	if (m->count < m->size)  /* vma is copied successfully */ |  | ||||||
| 		m->version = (vma != get_gate_vma(task->mm)) |  | ||||||
| 			? vma->vm_start : 0; |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -399,14 +402,14 @@ const struct file_operations proc_pid_maps_operations = { | ||||||
| 	.open		= pid_maps_open, | 	.open		= pid_maps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= proc_map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| const struct file_operations proc_tid_maps_operations = { | const struct file_operations proc_tid_maps_operations = { | ||||||
| 	.open		= tid_maps_open, | 	.open		= tid_maps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= proc_map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -583,8 +586,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | ||||||
| 
 | 
 | ||||||
| static int show_smap(struct seq_file *m, void *v, int is_pid) | static int show_smap(struct seq_file *m, void *v, int is_pid) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv = m->private; |  | ||||||
| 	struct task_struct *task = priv->task; |  | ||||||
| 	struct vm_area_struct *vma = v; | 	struct vm_area_struct *vma = v; | ||||||
| 	struct mem_size_stats mss; | 	struct mem_size_stats mss; | ||||||
| 	struct mm_walk smaps_walk = { | 	struct mm_walk smaps_walk = { | ||||||
|  | @ -637,10 +638,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | ||||||
| 				mss.nonlinear >> 10); | 				mss.nonlinear >> 10); | ||||||
| 
 | 
 | ||||||
| 	show_smap_vma_flags(m, vma); | 	show_smap_vma_flags(m, vma); | ||||||
| 
 | 	m_cache_vma(m, vma); | ||||||
| 	if (m->count < m->size)  /* vma is copied successfully */ |  | ||||||
| 		m->version = (vma != get_gate_vma(task->mm)) |  | ||||||
| 			? vma->vm_start : 0; |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -682,14 +680,14 @@ const struct file_operations proc_pid_smaps_operations = { | ||||||
| 	.open		= pid_smaps_open, | 	.open		= pid_smaps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= proc_map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| const struct file_operations proc_tid_smaps_operations = { | const struct file_operations proc_tid_smaps_operations = { | ||||||
| 	.open		= tid_smaps_open, | 	.open		= tid_smaps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= proc_map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -1029,7 +1027,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||||||
| 	spinlock_t *ptl; | 	spinlock_t *ptl; | ||||||
| 	pte_t *pte; | 	pte_t *pte; | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
| 	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |  | ||||||
| 
 | 
 | ||||||
| 	/* find the first VMA at or above 'addr' */ | 	/* find the first VMA at or above 'addr' */ | ||||||
| 	vma = find_vma(walk->mm, addr); | 	vma = find_vma(walk->mm, addr); | ||||||
|  | @ -1043,6 +1040,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||||||
| 
 | 
 | ||||||
| 		for (; addr != end; addr += PAGE_SIZE) { | 		for (; addr != end; addr += PAGE_SIZE) { | ||||||
| 			unsigned long offset; | 			unsigned long offset; | ||||||
|  | 			pagemap_entry_t pme; | ||||||
| 
 | 
 | ||||||
| 			offset = (addr & ~PAGEMAP_WALK_MASK) >> | 			offset = (addr & ~PAGEMAP_WALK_MASK) >> | ||||||
| 					PAGE_SHIFT; | 					PAGE_SHIFT; | ||||||
|  | @ -1057,34 +1055,53 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||||||
| 
 | 
 | ||||||
| 	if (pmd_trans_unstable(pmd)) | 	if (pmd_trans_unstable(pmd)) | ||||||
| 		return 0; | 		return 0; | ||||||
| 	for (; addr != end; addr += PAGE_SIZE) { |  | ||||||
| 		int flags2; |  | ||||||
| 
 | 
 | ||||||
| 		/* check to see if we've left 'vma' behind
 | 	while (1) { | ||||||
| 		 * and need a new, higher one */ | 		/* End of address space hole, which we mark as non-present. */ | ||||||
| 		if (vma && (addr >= vma->vm_end)) { | 		unsigned long hole_end; | ||||||
| 			vma = find_vma(walk->mm, addr); | 
 | ||||||
| 			if (vma && (vma->vm_flags & VM_SOFTDIRTY)) | 		if (vma) | ||||||
| 				flags2 = __PM_SOFT_DIRTY; | 			hole_end = min(end, vma->vm_start); | ||||||
| 		else | 		else | ||||||
| 				flags2 = 0; | 			hole_end = end; | ||||||
| 			pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); | 
 | ||||||
| 		} | 		for (; addr < hole_end; addr += PAGE_SIZE) { | ||||||
|  | 			pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); | ||||||
| 
 | 
 | ||||||
| 		/* check that 'vma' actually covers this address,
 |  | ||||||
| 		 * and that it isn't a huge page vma */ |  | ||||||
| 		if (vma && (vma->vm_start <= addr) && |  | ||||||
| 		    !is_vm_hugetlb_page(vma)) { |  | ||||||
| 			pte = pte_offset_map(pmd, addr); |  | ||||||
| 			pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |  | ||||||
| 			/* unmap before userspace copy */ |  | ||||||
| 			pte_unmap(pte); |  | ||||||
| 		} |  | ||||||
| 			err = add_to_pagemap(addr, &pme, pm); | 			err = add_to_pagemap(addr, &pme, pm); | ||||||
| 			if (err) | 			if (err) | ||||||
| 				return err; | 				return err; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		if (!vma || vma->vm_start >= end) | ||||||
|  | 			break; | ||||||
|  | 		/*
 | ||||||
|  | 		 * We can't possibly be in a hugetlb VMA. In general, | ||||||
|  | 		 * for a mm_walk with a pmd_entry and a hugetlb_entry, | ||||||
|  | 		 * the pmd_entry can only be called on addresses in a | ||||||
|  | 		 * hugetlb if the walk starts in a non-hugetlb VMA and | ||||||
|  | 		 * spans a hugepage VMA. Since pagemap_read walks are | ||||||
|  | 		 * PMD-sized and PMD-aligned, this will never be true. | ||||||
|  | 		 */ | ||||||
|  | 		BUG_ON(is_vm_hugetlb_page(vma)); | ||||||
|  | 
 | ||||||
|  | 		/* Addresses in the VMA. */ | ||||||
|  | 		for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | ||||||
|  | 			pagemap_entry_t pme; | ||||||
|  | 			pte = pte_offset_map(pmd, addr); | ||||||
|  | 			pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); | ||||||
|  | 			pte_unmap(pte); | ||||||
|  | 			err = add_to_pagemap(addr, &pme, pm); | ||||||
|  | 			if (err) | ||||||
|  | 				return err; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (addr == end) | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		vma = find_vma(walk->mm, addr); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	cond_resched(); | 	cond_resched(); | ||||||
| 
 | 
 | ||||||
| 	return err; | 	return err; | ||||||
|  | @ -1415,7 +1432,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | ||||||
| 	struct vm_area_struct *vma = v; | 	struct vm_area_struct *vma = v; | ||||||
| 	struct numa_maps *md = &numa_priv->md; | 	struct numa_maps *md = &numa_priv->md; | ||||||
| 	struct file *file = vma->vm_file; | 	struct file *file = vma->vm_file; | ||||||
| 	struct task_struct *task = proc_priv->task; |  | ||||||
| 	struct mm_struct *mm = vma->vm_mm; | 	struct mm_struct *mm = vma->vm_mm; | ||||||
| 	struct mm_walk walk = {}; | 	struct mm_walk walk = {}; | ||||||
| 	struct mempolicy *pol; | 	struct mempolicy *pol; | ||||||
|  | @ -1435,9 +1451,13 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | ||||||
| 	walk.private = md; | 	walk.private = md; | ||||||
| 	walk.mm = mm; | 	walk.mm = mm; | ||||||
| 
 | 
 | ||||||
| 	pol = get_vma_policy(task, vma, vma->vm_start); | 	pol = __get_vma_policy(vma, vma->vm_start); | ||||||
|  | 	if (pol) { | ||||||
| 		mpol_to_str(buffer, sizeof(buffer), pol); | 		mpol_to_str(buffer, sizeof(buffer), pol); | ||||||
| 		mpol_cond_put(pol); | 		mpol_cond_put(pol); | ||||||
|  | 	} else { | ||||||
|  | 		mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 	seq_printf(m, "%08lx %s", vma->vm_start, buffer); | ||||||
| 
 | 
 | ||||||
|  | @ -1447,7 +1467,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | ||||||
| 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | ||||||
| 		seq_puts(m, " heap"); | 		seq_puts(m, " heap"); | ||||||
| 	} else { | 	} else { | ||||||
| 		pid_t tid = vm_is_stack(task, vma, is_pid); | 		pid_t tid = pid_of_stack(proc_priv, vma, is_pid); | ||||||
| 		if (tid != 0) { | 		if (tid != 0) { | ||||||
| 			/*
 | 			/*
 | ||||||
| 			 * Thread stack in /proc/PID/task/TID/maps or | 			 * Thread stack in /proc/PID/task/TID/maps or | ||||||
|  | @ -1495,9 +1515,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | ||||||
| 			seq_printf(m, " N%d=%lu", nid, md->node[nid]); | 			seq_printf(m, " N%d=%lu", nid, md->node[nid]); | ||||||
| out: | out: | ||||||
| 	seq_putc(m, '\n'); | 	seq_putc(m, '\n'); | ||||||
| 
 | 	m_cache_vma(m, vma); | ||||||
| 	if (m->count < m->size) |  | ||||||
| 		m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1528,20 +1546,8 @@ static const struct seq_operations proc_tid_numa_maps_op = { | ||||||
| static int numa_maps_open(struct inode *inode, struct file *file, | static int numa_maps_open(struct inode *inode, struct file *file, | ||||||
| 			  const struct seq_operations *ops) | 			  const struct seq_operations *ops) | ||||||
| { | { | ||||||
| 	struct numa_maps_private *priv; | 	return proc_maps_open(inode, file, ops, | ||||||
| 	int ret = -ENOMEM; | 				sizeof(struct numa_maps_private)); | ||||||
| 	priv = kzalloc(sizeof(*priv), GFP_KERNEL); |  | ||||||
| 	if (priv) { |  | ||||||
| 		priv->proc_maps.pid = proc_pid(inode); |  | ||||||
| 		ret = seq_open(file, ops); |  | ||||||
| 		if (!ret) { |  | ||||||
| 			struct seq_file *m = file->private_data; |  | ||||||
| 			m->private = priv; |  | ||||||
| 		} else { |  | ||||||
| 			kfree(priv); |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int pid_numa_maps_open(struct inode *inode, struct file *file) | static int pid_numa_maps_open(struct inode *inode, struct file *file) | ||||||
|  | @ -1558,13 +1564,13 @@ const struct file_operations proc_pid_numa_maps_operations = { | ||||||
| 	.open		= pid_numa_maps_open, | 	.open		= pid_numa_maps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= proc_map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| const struct file_operations proc_tid_numa_maps_operations = { | const struct file_operations proc_tid_numa_maps_operations = { | ||||||
| 	.open		= tid_numa_maps_open, | 	.open		= tid_numa_maps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= proc_map_release, | ||||||
| }; | }; | ||||||
| #endif /* CONFIG_NUMA */ | #endif /* CONFIG_NUMA */ | ||||||
|  |  | ||||||
|  | @ -123,6 +123,25 @@ unsigned long task_statm(struct mm_struct *mm, | ||||||
| 	return size; | 	return size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static pid_t pid_of_stack(struct proc_maps_private *priv, | ||||||
|  | 				struct vm_area_struct *vma, bool is_pid) | ||||||
|  | { | ||||||
|  | 	struct inode *inode = priv->inode; | ||||||
|  | 	struct task_struct *task; | ||||||
|  | 	pid_t ret = 0; | ||||||
|  | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||||||
|  | 	if (task) { | ||||||
|  | 		task = task_of_stack(task, vma, is_pid); | ||||||
|  | 		if (task) | ||||||
|  | 			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | ||||||
|  | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * display a single VMA to a sequenced file |  * display a single VMA to a sequenced file | ||||||
|  */ |  */ | ||||||
|  | @ -163,7 +182,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | ||||||
| 		seq_pad(m, ' '); | 		seq_pad(m, ' '); | ||||||
| 		seq_path(m, &file->f_path, ""); | 		seq_path(m, &file->f_path, ""); | ||||||
| 	} else if (mm) { | 	} else if (mm) { | ||||||
| 		pid_t tid = vm_is_stack(priv->task, vma, is_pid); | 		pid_t tid = pid_of_stack(priv, vma, is_pid); | ||||||
| 
 | 
 | ||||||
| 		if (tid != 0) { | 		if (tid != 0) { | ||||||
| 			seq_pad(m, ' '); | 			seq_pad(m, ' '); | ||||||
|  | @ -212,22 +231,22 @@ static void *m_start(struct seq_file *m, loff_t *pos) | ||||||
| 	loff_t n = *pos; | 	loff_t n = *pos; | ||||||
| 
 | 
 | ||||||
| 	/* pin the task and mm whilst we play with them */ | 	/* pin the task and mm whilst we play with them */ | ||||||
| 	priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 	priv->task = get_proc_task(priv->inode); | ||||||
| 	if (!priv->task) | 	if (!priv->task) | ||||||
| 		return ERR_PTR(-ESRCH); | 		return ERR_PTR(-ESRCH); | ||||||
| 
 | 
 | ||||||
| 	mm = mm_access(priv->task, PTRACE_MODE_READ); | 	mm = priv->mm; | ||||||
| 	if (!mm || IS_ERR(mm)) { | 	if (!mm || !atomic_inc_not_zero(&mm->mm_users)) | ||||||
| 		put_task_struct(priv->task); | 		return NULL; | ||||||
| 		priv->task = NULL; |  | ||||||
| 		return mm; |  | ||||||
| 	} |  | ||||||
| 	down_read(&mm->mmap_sem); |  | ||||||
| 
 | 
 | ||||||
|  | 	down_read(&mm->mmap_sem); | ||||||
| 	/* start from the Nth VMA */ | 	/* start from the Nth VMA */ | ||||||
| 	for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) | 	for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) | ||||||
| 		if (n-- == 0) | 		if (n-- == 0) | ||||||
| 			return p; | 			return p; | ||||||
|  | 
 | ||||||
|  | 	up_read(&mm->mmap_sem); | ||||||
|  | 	mmput(mm); | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -235,11 +254,13 @@ static void m_stop(struct seq_file *m, void *_vml) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv = m->private; | 	struct proc_maps_private *priv = m->private; | ||||||
| 
 | 
 | ||||||
|  | 	if (!IS_ERR_OR_NULL(_vml)) { | ||||||
|  | 		up_read(&priv->mm->mmap_sem); | ||||||
|  | 		mmput(priv->mm); | ||||||
|  | 	} | ||||||
| 	if (priv->task) { | 	if (priv->task) { | ||||||
| 		struct mm_struct *mm = priv->task->mm; |  | ||||||
| 		up_read(&mm->mmap_sem); |  | ||||||
| 		mmput(mm); |  | ||||||
| 		put_task_struct(priv->task); | 		put_task_struct(priv->task); | ||||||
|  | 		priv->task = NULL; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -269,20 +290,33 @@ static int maps_open(struct inode *inode, struct file *file, | ||||||
| 		     const struct seq_operations *ops) | 		     const struct seq_operations *ops) | ||||||
| { | { | ||||||
| 	struct proc_maps_private *priv; | 	struct proc_maps_private *priv; | ||||||
| 	int ret = -ENOMEM; |  | ||||||
| 
 | 
 | ||||||
| 	priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 	priv = __seq_open_private(file, ops, sizeof(*priv)); | ||||||
| 	if (priv) { | 	if (!priv) | ||||||
| 		priv->pid = proc_pid(inode); | 		return -ENOMEM; | ||||||
| 		ret = seq_open(file, ops); | 
 | ||||||
| 		if (!ret) { | 	priv->inode = inode; | ||||||
| 			struct seq_file *m = file->private_data; | 	priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); | ||||||
| 			m->private = priv; | 	if (IS_ERR(priv->mm)) { | ||||||
| 		} else { | 		int err = PTR_ERR(priv->mm); | ||||||
| 			kfree(priv); | 
 | ||||||
|  | 		seq_release_private(inode, file); | ||||||
|  | 		return err; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
| } | } | ||||||
| 	return ret; | 
 | ||||||
|  | 
 | ||||||
|  | static int map_release(struct inode *inode, struct file *file) | ||||||
|  | { | ||||||
|  | 	struct seq_file *seq = file->private_data; | ||||||
|  | 	struct proc_maps_private *priv = seq->private; | ||||||
|  | 
 | ||||||
|  | 	if (priv->mm) | ||||||
|  | 		mmdrop(priv->mm); | ||||||
|  | 
 | ||||||
|  | 	return seq_release_private(inode, file); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int pid_maps_open(struct inode *inode, struct file *file) | static int pid_maps_open(struct inode *inode, struct file *file) | ||||||
|  | @ -299,13 +333,13 @@ const struct file_operations proc_pid_maps_operations = { | ||||||
| 	.open		= pid_maps_open, | 	.open		= pid_maps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| const struct file_operations proc_tid_maps_operations = { | const struct file_operations proc_tid_maps_operations = { | ||||||
| 	.open		= tid_maps_open, | 	.open		= tid_maps_open, | ||||||
| 	.read		= seq_read, | 	.read		= seq_read, | ||||||
| 	.llseek		= seq_lseek, | 	.llseek		= seq_lseek, | ||||||
| 	.release	= seq_release_private, | 	.release	= map_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -179,6 +179,15 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | ||||||
| extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, | extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, | ||||||
| 			   void *cpu_addr, dma_addr_t dma_addr, size_t size); | 			   void *cpu_addr, dma_addr_t dma_addr, size_t size); | ||||||
| 
 | 
 | ||||||
|  | void *dma_common_contiguous_remap(struct page *page, size_t size, | ||||||
|  | 			unsigned long vm_flags, | ||||||
|  | 			pgprot_t prot, const void *caller); | ||||||
|  | 
 | ||||||
|  | void *dma_common_pages_remap(struct page **pages, size_t size, | ||||||
|  | 			unsigned long vm_flags, pgprot_t prot, | ||||||
|  | 			const void *caller); | ||||||
|  | void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * dma_mmap_attrs - map a coherent DMA allocation into user space |  * dma_mmap_attrs - map a coherent DMA allocation into user space | ||||||
|  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | ||||||
|  |  | ||||||
|  | @ -664,11 +664,12 @@ static inline int pmd_trans_unstable(pmd_t *pmd) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_NUMA_BALANCING | #ifdef CONFIG_NUMA_BALANCING | ||||||
| #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE |  | ||||||
| /*
 | /*
 | ||||||
|  * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the |  * _PAGE_NUMA distinguishes between an unmapped page table entry, an entry that | ||||||
|  * same bit too). It's set only when _PAGE_PRESET is not set and it's |  * is protected for PROT_NONE and a NUMA hinting fault entry. If the | ||||||
|  * never set if _PAGE_PRESENT is set. |  * architecture defines __PAGE_PROTNONE then it should take that into account | ||||||
|  |  * but those that do not can rely on the fact that the NUMA hinting scanner | ||||||
|  |  * skips inaccessible VMAs. | ||||||
|  * |  * | ||||||
|  * pte/pmd_present() returns true if pte/pmd_numa returns true. Page |  * pte/pmd_present() returns true if pte/pmd_numa returns true. Page | ||||||
|  * fault triggers on those regions if pte/pmd_numa returns true |  * fault triggers on those regions if pte/pmd_numa returns true | ||||||
|  | @ -677,16 +678,14 @@ static inline int pmd_trans_unstable(pmd_t *pmd) | ||||||
| #ifndef pte_numa | #ifndef pte_numa | ||||||
| static inline int pte_numa(pte_t pte) | static inline int pte_numa(pte_t pte) | ||||||
| { | { | ||||||
| 	return (pte_flags(pte) & | 	return ptenuma_flags(pte) == _PAGE_NUMA; | ||||||
| 		(_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA; |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #ifndef pmd_numa | #ifndef pmd_numa | ||||||
| static inline int pmd_numa(pmd_t pmd) | static inline int pmd_numa(pmd_t pmd) | ||||||
| { | { | ||||||
| 	return (pmd_flags(pmd) & | 	return pmdnuma_flags(pmd) == _PAGE_NUMA; | ||||||
| 		(_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA; |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | @ -726,6 +725,8 @@ static inline pte_t pte_mknuma(pte_t pte) | ||||||
| { | { | ||||||
| 	pteval_t val = pte_val(pte); | 	pteval_t val = pte_val(pte); | ||||||
| 
 | 
 | ||||||
|  | 	VM_BUG_ON(!(val & _PAGE_PRESENT)); | ||||||
|  | 
 | ||||||
| 	val &= ~_PAGE_PRESENT; | 	val &= ~_PAGE_PRESENT; | ||||||
| 	val |= _PAGE_NUMA; | 	val |= _PAGE_NUMA; | ||||||
| 
 | 
 | ||||||
|  | @ -769,16 +770,6 @@ static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| #else | #else | ||||||
| extern int pte_numa(pte_t pte); |  | ||||||
| extern int pmd_numa(pmd_t pmd); |  | ||||||
| extern pte_t pte_mknonnuma(pte_t pte); |  | ||||||
| extern pmd_t pmd_mknonnuma(pmd_t pmd); |  | ||||||
| extern pte_t pte_mknuma(pte_t pte); |  | ||||||
| extern pmd_t pmd_mknuma(pmd_t pmd); |  | ||||||
| extern void ptep_set_numa(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |  | ||||||
| extern void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); |  | ||||||
| #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ |  | ||||||
| #else |  | ||||||
| static inline int pmd_numa(pmd_t pmd) | static inline int pmd_numa(pmd_t pmd) | ||||||
| { | { | ||||||
| 	return 0; | 	return 0; | ||||||
|  |  | ||||||
|  | @ -3,6 +3,8 @@ | ||||||
| 
 | 
 | ||||||
| /* References to section boundaries */ | /* References to section boundaries */ | ||||||
| 
 | 
 | ||||||
|  | #include <linux/compiler.h> | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Usage guidelines: |  * Usage guidelines: | ||||||
|  * _text, _data: architecture specific, don't use them in arch-independent code |  * _text, _data: architecture specific, don't use them in arch-independent code | ||||||
|  | @ -37,6 +39,8 @@ extern char __start_rodata[], __end_rodata[]; | ||||||
| /* Start and end of .ctors section - used for constructor calls. */ | /* Start and end of .ctors section - used for constructor calls. */ | ||||||
| extern char __ctors_start[], __ctors_end[]; | extern char __ctors_start[], __ctors_end[]; | ||||||
| 
 | 
 | ||||||
|  | extern __visible const void __nosave_begin, __nosave_end; | ||||||
|  | 
 | ||||||
| /* function descriptor handling (if any).  Override
 | /* function descriptor handling (if any).  Override
 | ||||||
|  * in asm/sections.h */ |  * in asm/sections.h */ | ||||||
| #ifndef dereference_function_descriptor | #ifndef dereference_function_descriptor | ||||||
|  |  | ||||||
|  | @ -27,10 +27,13 @@ | ||||||
|  *      counter raised only while it is under our special handling; |  *      counter raised only while it is under our special handling; | ||||||
|  * |  * | ||||||
|  * iii. after the lockless scan step have selected a potential balloon page for |  * iii. after the lockless scan step have selected a potential balloon page for | ||||||
|  *      isolation, re-test the page->mapping flags and the page ref counter |  *      isolation, re-test the PageBalloon mark and the PagePrivate flag | ||||||
|  *      under the proper page lock, to ensure isolating a valid balloon page |  *      under the proper page lock, to ensure isolating a valid balloon page | ||||||
|  *      (not yet isolated, nor under release procedure) |  *      (not yet isolated, nor under release procedure) | ||||||
|  * |  * | ||||||
|  |  *  iv. isolation or dequeueing procedure must clear PagePrivate flag under | ||||||
|  |  *      page lock together with removing page from balloon device page list. | ||||||
|  |  * | ||||||
|  * The functions provided by this interface are placed to help on coping with |  * The functions provided by this interface are placed to help on coping with | ||||||
|  * the aforementioned balloon page corner case, as well as to ensure the simple |  * the aforementioned balloon page corner case, as well as to ensure the simple | ||||||
|  * set of exposed rules are satisfied while we are dealing with balloon pages |  * set of exposed rules are satisfied while we are dealing with balloon pages | ||||||
|  | @ -54,43 +57,22 @@ | ||||||
|  * balloon driver as a page book-keeper for its registered balloon devices. |  * balloon driver as a page book-keeper for its registered balloon devices. | ||||||
|  */ |  */ | ||||||
| struct balloon_dev_info { | struct balloon_dev_info { | ||||||
| 	void *balloon_device;		/* balloon device descriptor */ |  | ||||||
| 	struct address_space *mapping;	/* balloon special page->mapping */ |  | ||||||
| 	unsigned long isolated_pages;	/* # of isolated pages for migration */ | 	unsigned long isolated_pages;	/* # of isolated pages for migration */ | ||||||
| 	spinlock_t pages_lock;		/* Protection to pages list */ | 	spinlock_t pages_lock;		/* Protection to pages list */ | ||||||
| 	struct list_head pages;		/* Pages enqueued & handled to Host */ | 	struct list_head pages;		/* Pages enqueued & handled to Host */ | ||||||
|  | 	int (*migratepage)(struct balloon_dev_info *, struct page *newpage, | ||||||
|  | 			struct page *page, enum migrate_mode mode); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); | extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); | ||||||
| extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); | extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); | ||||||
| extern struct balloon_dev_info *balloon_devinfo_alloc( |  | ||||||
| 						void *balloon_dev_descriptor); |  | ||||||
| 
 | 
 | ||||||
| static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info) | static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) | ||||||
| { | { | ||||||
| 	kfree(b_dev_info); | 	balloon->isolated_pages = 0; | ||||||
| } | 	spin_lock_init(&balloon->pages_lock); | ||||||
| 
 | 	INIT_LIST_HEAD(&balloon->pages); | ||||||
| /*
 | 	balloon->migratepage = NULL; | ||||||
|  * balloon_page_free - release a balloon page back to the page free lists |  | ||||||
|  * @page: ballooned page to be set free |  | ||||||
|  * |  | ||||||
|  * This function must be used to properly set free an isolated/dequeued balloon |  | ||||||
|  * page at the end of a sucessful page migration, or at the balloon driver's |  | ||||||
|  * page release procedure. |  | ||||||
|  */ |  | ||||||
| static inline void balloon_page_free(struct page *page) |  | ||||||
| { |  | ||||||
| 	/*
 |  | ||||||
| 	 * Balloon pages always get an extra refcount before being isolated |  | ||||||
| 	 * and before being dequeued to help on sorting out fortuite colisions |  | ||||||
| 	 * between a thread attempting to isolate and another thread attempting |  | ||||||
| 	 * to release the very same balloon page. |  | ||||||
| 	 * |  | ||||||
| 	 * Before we handle the page back to Buddy, lets drop its extra refcnt. |  | ||||||
| 	 */ |  | ||||||
| 	put_page(page); |  | ||||||
| 	__free_page(page); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_BALLOON_COMPACTION | #ifdef CONFIG_BALLOON_COMPACTION | ||||||
|  | @ -98,107 +80,58 @@ extern bool balloon_page_isolate(struct page *page); | ||||||
| extern void balloon_page_putback(struct page *page); | extern void balloon_page_putback(struct page *page); | ||||||
| extern int balloon_page_migrate(struct page *newpage, | extern int balloon_page_migrate(struct page *newpage, | ||||||
| 				struct page *page, enum migrate_mode mode); | 				struct page *page, enum migrate_mode mode); | ||||||
| extern struct address_space |  | ||||||
| *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info, |  | ||||||
| 			const struct address_space_operations *a_ops); |  | ||||||
| 
 |  | ||||||
| static inline void balloon_mapping_free(struct address_space *balloon_mapping) |  | ||||||
| { |  | ||||||
| 	kfree(balloon_mapping); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * page_flags_cleared - helper to perform balloon @page ->flags tests. |  * __is_movable_balloon_page - helper to perform @page PageBalloon tests | ||||||
|  * |  | ||||||
|  * As balloon pages are obtained from buddy and we do not play with page->flags |  | ||||||
|  * at driver level (exception made when we get the page lock for compaction), |  | ||||||
|  * we can safely identify a ballooned page by checking if the |  | ||||||
|  * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared.  This approach also |  | ||||||
|  * helps us skip ballooned pages that are locked for compaction or release, thus |  | ||||||
|  * mitigating their racy check at balloon_page_movable() |  | ||||||
|  */ |  | ||||||
| static inline bool page_flags_cleared(struct page *page) |  | ||||||
| { |  | ||||||
| 	return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * __is_movable_balloon_page - helper to perform @page mapping->flags tests |  | ||||||
|  */ |  */ | ||||||
| static inline bool __is_movable_balloon_page(struct page *page) | static inline bool __is_movable_balloon_page(struct page *page) | ||||||
| { | { | ||||||
| 	struct address_space *mapping = page->mapping; | 	return PageBalloon(page); | ||||||
| 	return mapping_balloon(mapping); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * balloon_page_movable - test page->mapping->flags to identify balloon pages |  * balloon_page_movable - test PageBalloon to identify balloon pages | ||||||
|  *			  that can be moved by compaction/migration. |  *			  and PagePrivate to check that the page is not | ||||||
|  * |  *			  isolated and can be moved by compaction/migration. | ||||||
|  * This function is used at core compaction's page isolation scheme, therefore |  | ||||||
|  * most pages exposed to it are not enlisted as balloon pages and so, to avoid |  | ||||||
|  * undesired side effects like racing against __free_pages(), we cannot afford |  | ||||||
|  * holding the page locked while testing page->mapping->flags here. |  | ||||||
|  * |  * | ||||||
|  * As we might return false positives in the case of a balloon page being just |  * As we might return false positives in the case of a balloon page being just | ||||||
|  * released under us, the page->mapping->flags need to be re-tested later, |  * released under us, this need to be re-tested later, under the page lock. | ||||||
|  * under the proper page lock, at the functions that will be coping with the |  | ||||||
|  * balloon page case. |  | ||||||
|  */ |  */ | ||||||
| static inline bool balloon_page_movable(struct page *page) | static inline bool balloon_page_movable(struct page *page) | ||||||
| { | { | ||||||
| 	/*
 | 	return PageBalloon(page) && PagePrivate(page); | ||||||
| 	 * Before dereferencing and testing mapping->flags, let's make sure |  | ||||||
| 	 * this is not a page that uses ->mapping in a different way |  | ||||||
| 	 */ |  | ||||||
| 	if (page_flags_cleared(page) && !page_mapped(page) && |  | ||||||
| 	    page_count(page) == 1) |  | ||||||
| 		return __is_movable_balloon_page(page); |  | ||||||
| 
 |  | ||||||
| 	return false; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * isolated_balloon_page - identify an isolated balloon page on private |  * isolated_balloon_page - identify an isolated balloon page on private | ||||||
|  *			   compaction/migration page lists. |  *			   compaction/migration page lists. | ||||||
|  * |  | ||||||
|  * After a compaction thread isolates a balloon page for migration, it raises |  | ||||||
|  * the page refcount to prevent concurrent compaction threads from re-isolating |  | ||||||
|  * the same page. For that reason putback_movable_pages(), or other routines |  | ||||||
|  * that need to identify isolated balloon pages on private pagelists, cannot |  | ||||||
|  * rely on balloon_page_movable() to accomplish the task. |  | ||||||
|  */ |  */ | ||||||
| static inline bool isolated_balloon_page(struct page *page) | static inline bool isolated_balloon_page(struct page *page) | ||||||
| { | { | ||||||
| 	/* Already isolated balloon pages, by default, have a raised refcount */ | 	return PageBalloon(page); | ||||||
| 	if (page_flags_cleared(page) && !page_mapped(page) && |  | ||||||
| 	    page_count(page) >= 2) |  | ||||||
| 		return __is_movable_balloon_page(page); |  | ||||||
| 
 |  | ||||||
| 	return false; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * balloon_page_insert - insert a page into the balloon's page list and make |  * balloon_page_insert - insert a page into the balloon's page list and make | ||||||
|  *		         the page->mapping assignment accordingly. |  *			 the page->private assignment accordingly. | ||||||
|  |  * @balloon : pointer to balloon device | ||||||
|  * @page    : page to be assigned as a 'balloon page' |  * @page    : page to be assigned as a 'balloon page' | ||||||
|  * @mapping : allocated special 'balloon_mapping' |  | ||||||
|  * @head    : balloon's device page list head |  | ||||||
|  * |  * | ||||||
|  * Caller must ensure the page is locked and the spin_lock protecting balloon |  * Caller must ensure the page is locked and the spin_lock protecting balloon | ||||||
|  * pages list is held before inserting a page into the balloon device. |  * pages list is held before inserting a page into the balloon device. | ||||||
|  */ |  */ | ||||||
| static inline void balloon_page_insert(struct page *page, | static inline void balloon_page_insert(struct balloon_dev_info *balloon, | ||||||
| 				       struct address_space *mapping, | 				       struct page *page) | ||||||
| 				       struct list_head *head) |  | ||||||
| { | { | ||||||
| 	page->mapping = mapping; | 	__SetPageBalloon(page); | ||||||
| 	list_add(&page->lru, head); | 	SetPagePrivate(page); | ||||||
|  | 	set_page_private(page, (unsigned long)balloon); | ||||||
|  | 	list_add(&page->lru, &balloon->pages); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * balloon_page_delete - delete a page from balloon's page list and clear |  * balloon_page_delete - delete a page from balloon's page list and clear | ||||||
|  *			 the page->mapping assignement accordingly. |  *			 the page->private assignement accordingly. | ||||||
|  * @page    : page to be released from balloon's page list |  * @page    : page to be released from balloon's page list | ||||||
|  * |  * | ||||||
|  * Caller must ensure the page is locked and the spin_lock protecting balloon |  * Caller must ensure the page is locked and the spin_lock protecting balloon | ||||||
|  | @ -206,9 +139,13 @@ static inline void balloon_page_insert(struct page *page, | ||||||
|  */ |  */ | ||||||
| static inline void balloon_page_delete(struct page *page) | static inline void balloon_page_delete(struct page *page) | ||||||
| { | { | ||||||
| 	page->mapping = NULL; | 	__ClearPageBalloon(page); | ||||||
|  | 	set_page_private(page, 0); | ||||||
|  | 	if (PagePrivate(page)) { | ||||||
|  | 		ClearPagePrivate(page); | ||||||
| 		list_del(&page->lru); | 		list_del(&page->lru); | ||||||
| 	} | 	} | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * balloon_page_device - get the b_dev_info descriptor for the balloon device |  * balloon_page_device - get the b_dev_info descriptor for the balloon device | ||||||
|  | @ -216,11 +153,7 @@ static inline void balloon_page_delete(struct page *page) | ||||||
|  */ |  */ | ||||||
| static inline struct balloon_dev_info *balloon_page_device(struct page *page) | static inline struct balloon_dev_info *balloon_page_device(struct page *page) | ||||||
| { | { | ||||||
| 	struct address_space *mapping = page->mapping; | 	return (struct balloon_dev_info *)page_private(page); | ||||||
| 	if (likely(mapping)) |  | ||||||
| 		return mapping->private_data; |  | ||||||
| 
 |  | ||||||
| 	return NULL; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline gfp_t balloon_mapping_gfp_mask(void) | static inline gfp_t balloon_mapping_gfp_mask(void) | ||||||
|  | @ -228,36 +161,26 @@ static inline gfp_t balloon_mapping_gfp_mask(void) | ||||||
| 	return GFP_HIGHUSER_MOVABLE; | 	return GFP_HIGHUSER_MOVABLE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool balloon_compaction_check(void) |  | ||||||
| { |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #else /* !CONFIG_BALLOON_COMPACTION */ | #else /* !CONFIG_BALLOON_COMPACTION */ | ||||||
| 
 | 
 | ||||||
| static inline void *balloon_mapping_alloc(void *balloon_device, | static inline void balloon_page_insert(struct balloon_dev_info *balloon, | ||||||
| 				const struct address_space_operations *a_ops) | 				       struct page *page) | ||||||
| { | { | ||||||
| 	return ERR_PTR(-EOPNOTSUPP); | 	__SetPageBalloon(page); | ||||||
| } | 	list_add(&page->lru, &balloon->pages); | ||||||
| 
 |  | ||||||
| static inline void balloon_mapping_free(struct address_space *balloon_mapping) |  | ||||||
| { |  | ||||||
| 	return; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void balloon_page_insert(struct page *page, |  | ||||||
| 				       struct address_space *mapping, |  | ||||||
| 				       struct list_head *head) |  | ||||||
| { |  | ||||||
| 	list_add(&page->lru, head); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void balloon_page_delete(struct page *page) | static inline void balloon_page_delete(struct page *page) | ||||||
| { | { | ||||||
|  | 	__ClearPageBalloon(page); | ||||||
| 	list_del(&page->lru); | 	list_del(&page->lru); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool __is_movable_balloon_page(struct page *page) | ||||||
|  | { | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline bool balloon_page_movable(struct page *page) | static inline bool balloon_page_movable(struct page *page) | ||||||
| { | { | ||||||
| 	return false; | 	return false; | ||||||
|  | @ -289,9 +212,5 @@ static inline gfp_t balloon_mapping_gfp_mask(void) | ||||||
| 	return GFP_HIGHUSER; | 	return GFP_HIGHUSER; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool balloon_compaction_check(void) |  | ||||||
| { |  | ||||||
| 	return false; |  | ||||||
| } |  | ||||||
| #endif /* CONFIG_BALLOON_COMPACTION */ | #endif /* CONFIG_BALLOON_COMPACTION */ | ||||||
| #endif /* _LINUX_BALLOON_COMPACTION_H */ | #endif /* _LINUX_BALLOON_COMPACTION_H */ | ||||||
|  |  | ||||||
|  | @ -1564,7 +1564,7 @@ static inline int blk_rq_map_integrity_sg(struct request_queue *q, | ||||||
| } | } | ||||||
| static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) | static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) | ||||||
| { | { | ||||||
| 	return 0; | 	return NULL; | ||||||
| } | } | ||||||
| static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) | static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -2,14 +2,24 @@ | ||||||
| #define _LINUX_COMPACTION_H | #define _LINUX_COMPACTION_H | ||||||
| 
 | 
 | ||||||
| /* Return values for compact_zone() and try_to_compact_pages() */ | /* Return values for compact_zone() and try_to_compact_pages() */ | ||||||
|  | /* compaction didn't start as it was deferred due to past failures */ | ||||||
|  | #define COMPACT_DEFERRED	0 | ||||||
| /* compaction didn't start as it was not possible or direct reclaim was more suitable */ | /* compaction didn't start as it was not possible or direct reclaim was more suitable */ | ||||||
| #define COMPACT_SKIPPED		0 | #define COMPACT_SKIPPED		1 | ||||||
| /* compaction should continue to another pageblock */ | /* compaction should continue to another pageblock */ | ||||||
| #define COMPACT_CONTINUE	1 | #define COMPACT_CONTINUE	2 | ||||||
| /* direct compaction partially compacted a zone and there are suitable pages */ | /* direct compaction partially compacted a zone and there are suitable pages */ | ||||||
| #define COMPACT_PARTIAL		2 | #define COMPACT_PARTIAL		3 | ||||||
| /* The full zone was compacted */ | /* The full zone was compacted */ | ||||||
| #define COMPACT_COMPLETE	3 | #define COMPACT_COMPLETE	4 | ||||||
|  | 
 | ||||||
|  | /* Used to signal whether compaction detected need_sched() or lock contention */ | ||||||
|  | /* No contention detected */ | ||||||
|  | #define COMPACT_CONTENDED_NONE	0 | ||||||
|  | /* Either need_sched() was true or fatal signal pending */ | ||||||
|  | #define COMPACT_CONTENDED_SCHED	1 | ||||||
|  | /* Zone lock or lru_lock was contended in async compaction */ | ||||||
|  | #define COMPACT_CONTENDED_LOCK	2 | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_COMPACTION | #ifdef CONFIG_COMPACTION | ||||||
| extern int sysctl_compact_memory; | extern int sysctl_compact_memory; | ||||||
|  | @ -22,7 +32,8 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, | ||||||
| extern int fragmentation_index(struct zone *zone, unsigned int order); | extern int fragmentation_index(struct zone *zone, unsigned int order); | ||||||
| extern unsigned long try_to_compact_pages(struct zonelist *zonelist, | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||||||
| 			int order, gfp_t gfp_mask, nodemask_t *mask, | 			int order, gfp_t gfp_mask, nodemask_t *mask, | ||||||
| 			enum migrate_mode mode, bool *contended); | 			enum migrate_mode mode, int *contended, | ||||||
|  | 			struct zone **candidate_zone); | ||||||
| extern void compact_pgdat(pg_data_t *pgdat, int order); | extern void compact_pgdat(pg_data_t *pgdat, int order); | ||||||
| extern void reset_isolation_suitable(pg_data_t *pgdat); | extern void reset_isolation_suitable(pg_data_t *pgdat); | ||||||
| extern unsigned long compaction_suitable(struct zone *zone, int order); | extern unsigned long compaction_suitable(struct zone *zone, int order); | ||||||
|  | @ -91,7 +102,8 @@ static inline bool compaction_restarting(struct zone *zone, int order) | ||||||
| #else | #else | ||||||
| static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, | static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||||||
| 			int order, gfp_t gfp_mask, nodemask_t *nodemask, | 			int order, gfp_t gfp_mask, nodemask_t *nodemask, | ||||||
| 			enum migrate_mode mode, bool *contended) | 			enum migrate_mode mode, int *contended, | ||||||
|  | 			struct zone **candidate_zone) | ||||||
| { | { | ||||||
| 	return COMPACT_CONTINUE; | 	return COMPACT_CONTINUE; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -110,6 +110,10 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, | ||||||
| extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, | extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, | ||||||
| 		unsigned long start, unsigned int nr, void *data); | 		unsigned long start, unsigned int nr, void *data); | ||||||
| 
 | 
 | ||||||
|  | extern unsigned long gen_pool_first_fit_order_align(unsigned long *map, | ||||||
|  | 		unsigned long size, unsigned long start, unsigned int nr, | ||||||
|  | 		void *data); | ||||||
|  | 
 | ||||||
| extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, | extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, | ||||||
| 		unsigned long start, unsigned int nr, void *data); | 		unsigned long start, unsigned int nr, void *data); | ||||||
| 
 | 
 | ||||||
|  | @ -117,6 +121,9 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev, | ||||||
| 		int min_alloc_order, int nid); | 		int min_alloc_order, int nid); | ||||||
| extern struct gen_pool *dev_get_gen_pool(struct device *dev); | extern struct gen_pool *dev_get_gen_pool(struct device *dev); | ||||||
| 
 | 
 | ||||||
|  | bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, | ||||||
|  | 			size_t size); | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_OF | #ifdef CONFIG_OF | ||||||
| extern struct gen_pool *of_get_named_gen_pool(struct device_node *np, | extern struct gen_pool *of_get_named_gen_pool(struct device_node *np, | ||||||
| 	const char *propname, int index); | 	const char *propname, int index); | ||||||
|  |  | ||||||
|  | @ -156,7 +156,7 @@ struct vm_area_struct; | ||||||
| #define GFP_DMA32	__GFP_DMA32 | #define GFP_DMA32	__GFP_DMA32 | ||||||
| 
 | 
 | ||||||
| /* Convert GFP flags to their corresponding migrate type */ | /* Convert GFP flags to their corresponding migrate type */ | ||||||
| static inline int allocflags_to_migratetype(gfp_t gfp_flags) | static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) | ||||||
| { | { | ||||||
| 	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); | 	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -132,7 +132,7 @@ extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | ||||||
| static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | ||||||
| 		spinlock_t **ptl) | 		spinlock_t **ptl) | ||||||
| { | { | ||||||
| 	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | 	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); | ||||||
| 	if (pmd_trans_huge(*pmd)) | 	if (pmd_trans_huge(*pmd)) | ||||||
| 		return __pmd_trans_huge_lock(pmd, vma, ptl); | 		return __pmd_trans_huge_lock(pmd, vma, ptl); | ||||||
| 	else | 	else | ||||||
|  |  | ||||||
Some files were not shown because too many files have changed in this diff Show more
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds