mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	tcp: Use per-vma locking for receive zerocopy
Per-VMA locking allows us to lock a struct vm_area_struct without taking the process-wide mmap lock in read mode. Consider a process workload where the mmap lock is taken constantly in write mode. In this scenario, all zerocopy receives are periodically blocked during that period of time - though in principle, the memory ranges being used by TCP are not touched by the operations that need the mmap write lock. This results in performance degradation. Now consider another workload where the mmap lock is never taken in write mode, but there are many TCP connections using receive zerocopy that are concurrently receiving. These connections all take the mmap lock in read mode, but this does induce a lot of contention and atomic ops for this process-wide lock. This results in additional CPU overhead caused by contending on the cache line for this lock. However, with per-vma locking, both of these problems can be avoided. As a test, I ran an RPC-style request/response workload with 4KB payloads and receive zerocopy enabled, with 100 simultaneous TCP connections. I measured perf cycles within the find_tcp_vma/mmap_read_lock/mmap_read_unlock codepath, with and without per-vma locking enabled. When using process-wide mmap semaphore read locking, about 1% of measured perf cycles were within this path. With per-VMA locking, this value dropped to about 0.45%. Signed-off-by: Arjun Roy <arjunroy@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									b650d953cd
								
							
						
					
					
						commit
						7a7f094635
					
				
					 5 changed files with 60 additions and 11 deletions
				
			
		|  | @ -14743,6 +14743,7 @@ NETWORKING [TCP] | |||
| M:	Eric Dumazet <edumazet@google.com> | ||||
| L:	netdev@vger.kernel.org | ||||
| S:	Maintained | ||||
| F:	include/linux/net_mm.h | ||||
| F:	include/linux/tcp.h | ||||
| F:	include/net/tcp.h | ||||
| F:	include/trace/events/tcp.h | ||||
|  |  | |||
							
								
								
									
										17
									
								
								include/linux/net_mm.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								include/linux/net_mm.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,17 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||||
| #ifdef CONFIG_MMU | ||||
| 
 | ||||
| #ifdef CONFIG_INET | ||||
| extern const struct vm_operations_struct tcp_vm_ops; | ||||
| static inline bool vma_is_tcp(const struct vm_area_struct *vma) | ||||
| { | ||||
| 	return vma->vm_ops == &tcp_vm_ops; | ||||
| } | ||||
| #else | ||||
| static inline bool vma_is_tcp(const struct vm_area_struct *vma) | ||||
| { | ||||
| 	return false; | ||||
| } | ||||
| #endif /* CONFIG_INET*/ | ||||
| 
 | ||||
| #endif /* CONFIG_MMU */ | ||||
|  | @ -45,6 +45,7 @@ | |||
| #include <linux/memcontrol.h> | ||||
| #include <linux/bpf-cgroup.h> | ||||
| #include <linux/siphash.h> | ||||
| #include <linux/net_mm.h> | ||||
| 
 | ||||
| extern struct inet_hashinfo tcp_hashinfo; | ||||
| 
 | ||||
|  |  | |||
|  | @ -77,6 +77,7 @@ | |||
| #include <linux/ptrace.h> | ||||
| #include <linux/vmalloc.h> | ||||
| #include <linux/sched/sysctl.h> | ||||
| #include <linux/net_mm.h> | ||||
| 
 | ||||
| #include <trace/events/kmem.h> | ||||
| 
 | ||||
|  | @ -5280,12 +5281,12 @@ retry: | |||
| 	if (!vma) | ||||
| 		goto inval; | ||||
| 
 | ||||
| 	/* Only anonymous vmas are supported for now */ | ||||
| 	if (!vma_is_anonymous(vma)) | ||||
| 	/* Only anonymous and tcp vmas are supported for now */ | ||||
| 	if (!vma_is_anonymous(vma) && !vma_is_tcp(vma)) | ||||
| 		goto inval; | ||||
| 
 | ||||
| 	/* find_mergeable_anon_vma uses adjacent vmas which are not locked */ | ||||
| 	if (!vma->anon_vma) | ||||
| 	if (!vma->anon_vma && !vma_is_tcp(vma)) | ||||
| 		goto inval; | ||||
| 
 | ||||
| 	if (!vma_start_read(vma)) | ||||
|  |  | |||
|  | @ -1774,7 +1774,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb, | |||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_MMU | ||||
| static const struct vm_operations_struct tcp_vm_ops = { | ||||
| const struct vm_operations_struct tcp_vm_ops = { | ||||
| }; | ||||
| 
 | ||||
| int tcp_mmap(struct file *file, struct socket *sock, | ||||
|  | @ -2073,6 +2073,34 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk, | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, | ||||
| 					   unsigned long address, | ||||
| 					   bool *mmap_locked) | ||||
| { | ||||
| 	struct vm_area_struct *vma = NULL; | ||||
| 
 | ||||
| #ifdef CONFIG_PER_VMA_LOCK | ||||
| 	vma = lock_vma_under_rcu(mm, address); | ||||
| #endif | ||||
| 	if (vma) { | ||||
| 		if (!vma_is_tcp(vma)) { | ||||
| 			vma_end_read(vma); | ||||
| 			return NULL; | ||||
| 		} | ||||
| 		*mmap_locked = false; | ||||
| 		return vma; | ||||
| 	} | ||||
| 
 | ||||
| 	mmap_read_lock(mm); | ||||
| 	vma = vma_lookup(mm, address); | ||||
| 	if (!vma || !vma_is_tcp(vma)) { | ||||
| 		mmap_read_unlock(mm); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	*mmap_locked = true; | ||||
| 	return vma; | ||||
| } | ||||
| 
 | ||||
| #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 | ||||
| static int tcp_zerocopy_receive(struct sock *sk, | ||||
| 				struct tcp_zerocopy_receive *zc, | ||||
|  | @ -2090,6 +2118,7 @@ static int tcp_zerocopy_receive(struct sock *sk, | |||
| 	u32 seq = tp->copied_seq; | ||||
| 	u32 total_bytes_to_map; | ||||
| 	int inq = tcp_inq(sk); | ||||
| 	bool mmap_locked; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	zc->copybuf_len = 0; | ||||
|  | @ -2114,13 +2143,10 @@ static int tcp_zerocopy_receive(struct sock *sk, | |||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	mmap_read_lock(current->mm); | ||||
| 
 | ||||
| 	vma = vma_lookup(current->mm, address); | ||||
| 	if (!vma || vma->vm_ops != &tcp_vm_ops) { | ||||
| 		mmap_read_unlock(current->mm); | ||||
| 	vma = find_tcp_vma(current->mm, address, &mmap_locked); | ||||
| 	if (!vma) | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); | ||||
| 	avail_len = min_t(u32, vma_len, inq); | ||||
| 	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); | ||||
|  | @ -2194,7 +2220,10 @@ static int tcp_zerocopy_receive(struct sock *sk, | |||
| 						   zc, total_bytes_to_map); | ||||
| 	} | ||||
| out: | ||||
| 	if (mmap_locked) | ||||
| 		mmap_read_unlock(current->mm); | ||||
| 	else | ||||
| 		vma_end_read(vma); | ||||
| 	/* Try to copy straggler data. */ | ||||
| 	if (!ret) | ||||
| 		copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss); | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Arjun Roy
						Arjun Roy