mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 08:44:41 +00:00 
			
		
		
		
	veth: Add XDP TX and REDIRECT
This allows further redirection of xdp_frames like NIC -> veth--veth -> veth--veth (XDP) (XDP) (XDP) The intermediate XDP, redirecting packets from NIC to the other veth, reuses xdp_mem_info from NIC so that page recycling of the NIC works on the destination veth's XDP. In this way return_frame is not fully guarded by NAPI, since another NAPI handler on another cpu may use the same xdp_mem_info concurrently. Thus disable napi_direct by xdp_set_return_frame_no_direct() during the NAPI context. v8: - Don't use xdp_frame pointer address for data_hard_start of xdp_buff. v4: - Use xdp_[set|clear]_return_frame_no_direct() instead of a flag in xdp_mem_info. v3: - Fix double free when veth_xdp_tx() returns a positive value. - Convert xdp_xmit and xdp_redir variables into flags. Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
		
							parent
							
								
									2539650fad
								
							
						
					
					
						commit
						d1396004dd
					
				
					 1 changed files with 110 additions and 9 deletions
				
			
		|  | @ -32,6 +32,10 @@ | |||
| #define VETH_RING_SIZE		256 | ||||
| #define VETH_XDP_HEADROOM	(XDP_PACKET_HEADROOM + NET_IP_ALIGN) | ||||
| 
 | ||||
| /* Separating two types of XDP xmit */ | ||||
| #define VETH_XDP_TX		BIT(0) | ||||
| #define VETH_XDP_REDIR		BIT(1) | ||||
| 
 | ||||
| struct pcpu_vstats { | ||||
| 	u64			packets; | ||||
| 	u64			bytes; | ||||
|  | @ -45,6 +49,7 @@ struct veth_priv { | |||
| 	struct bpf_prog		*_xdp_prog; | ||||
| 	struct net_device __rcu	*peer; | ||||
| 	atomic64_t		dropped; | ||||
| 	struct xdp_mem_info	xdp_mem; | ||||
| 	unsigned		requested_headroom; | ||||
| 	bool			rx_notify_masked; | ||||
| 	struct ptr_ring		xdp_ring; | ||||
|  | @ -317,12 +322,44 @@ static int veth_xdp_xmit(struct net_device *dev, int n, | |||
| 	return n - drops; | ||||
| } | ||||
| 
 | ||||
| static void veth_xdp_flush(struct net_device *dev) | ||||
| { | ||||
| 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev); | ||||
| 	struct net_device *rcv; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	rcv = rcu_dereference(priv->peer); | ||||
| 	if (unlikely(!rcv)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	rcv_priv = netdev_priv(rcv); | ||||
| 	/* xdp_ring is initialized on receive side? */ | ||||
| 	if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog))) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	__veth_xdp_flush(rcv_priv); | ||||
| out: | ||||
| 	rcu_read_unlock(); | ||||
| } | ||||
| 
 | ||||
| static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) | ||||
| { | ||||
| 	struct xdp_frame *frame = convert_to_xdp_frame(xdp); | ||||
| 
 | ||||
| 	if (unlikely(!frame)) | ||||
| 		return -EOVERFLOW; | ||||
| 
 | ||||
| 	return veth_xdp_xmit(dev, 1, &frame, 0); | ||||
| } | ||||
| 
 | ||||
| static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, | ||||
| 					struct xdp_frame *frame) | ||||
| 					struct xdp_frame *frame, | ||||
| 					unsigned int *xdp_xmit) | ||||
| { | ||||
| 	void *hard_start = frame->data - frame->headroom; | ||||
| 	void *head = hard_start - sizeof(struct xdp_frame); | ||||
| 	int len = frame->len, delta = 0; | ||||
| 	struct xdp_frame orig_frame; | ||||
| 	struct bpf_prog *xdp_prog; | ||||
| 	unsigned int headroom; | ||||
| 	struct sk_buff *skb; | ||||
|  | @ -346,6 +383,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, | |||
| 			delta = frame->data - xdp.data; | ||||
| 			len = xdp.data_end - xdp.data; | ||||
| 			break; | ||||
| 		case XDP_TX: | ||||
| 			orig_frame = *frame; | ||||
| 			xdp.data_hard_start = head; | ||||
| 			xdp.rxq->mem = frame->mem; | ||||
| 			if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) { | ||||
| 				trace_xdp_exception(priv->dev, xdp_prog, act); | ||||
| 				frame = &orig_frame; | ||||
| 				goto err_xdp; | ||||
| 			} | ||||
| 			*xdp_xmit |= VETH_XDP_TX; | ||||
| 			rcu_read_unlock(); | ||||
| 			goto xdp_xmit; | ||||
| 		case XDP_REDIRECT: | ||||
| 			orig_frame = *frame; | ||||
| 			xdp.data_hard_start = head; | ||||
| 			xdp.rxq->mem = frame->mem; | ||||
| 			if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) { | ||||
| 				frame = &orig_frame; | ||||
| 				goto err_xdp; | ||||
| 			} | ||||
| 			*xdp_xmit |= VETH_XDP_REDIR; | ||||
| 			rcu_read_unlock(); | ||||
| 			goto xdp_xmit; | ||||
| 		default: | ||||
| 			bpf_warn_invalid_xdp_action(act); | ||||
| 		case XDP_ABORTED: | ||||
|  | @ -370,12 +430,13 @@ err: | |||
| err_xdp: | ||||
| 	rcu_read_unlock(); | ||||
| 	xdp_return_frame(frame); | ||||
| 
 | ||||
| xdp_xmit: | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, | ||||
| 					struct sk_buff *skb) | ||||
| 					struct sk_buff *skb, | ||||
| 					unsigned int *xdp_xmit) | ||||
| { | ||||
| 	u32 pktlen, headroom, act, metalen; | ||||
| 	void *orig_data, *orig_data_end; | ||||
|  | @ -447,6 +508,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, | |||
| 	switch (act) { | ||||
| 	case XDP_PASS: | ||||
| 		break; | ||||
| 	case XDP_TX: | ||||
| 		get_page(virt_to_page(xdp.data)); | ||||
| 		consume_skb(skb); | ||||
| 		xdp.rxq->mem = priv->xdp_mem; | ||||
| 		if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) { | ||||
| 			trace_xdp_exception(priv->dev, xdp_prog, act); | ||||
| 			goto err_xdp; | ||||
| 		} | ||||
| 		*xdp_xmit |= VETH_XDP_TX; | ||||
| 		rcu_read_unlock(); | ||||
| 		goto xdp_xmit; | ||||
| 	case XDP_REDIRECT: | ||||
| 		get_page(virt_to_page(xdp.data)); | ||||
| 		consume_skb(skb); | ||||
| 		xdp.rxq->mem = priv->xdp_mem; | ||||
| 		if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) | ||||
| 			goto err_xdp; | ||||
| 		*xdp_xmit |= VETH_XDP_REDIR; | ||||
| 		rcu_read_unlock(); | ||||
| 		goto xdp_xmit; | ||||
| 	default: | ||||
| 		bpf_warn_invalid_xdp_action(act); | ||||
| 	case XDP_ABORTED: | ||||
|  | @ -477,9 +558,15 @@ drop: | |||
| 	rcu_read_unlock(); | ||||
| 	kfree_skb(skb); | ||||
| 	return NULL; | ||||
| err_xdp: | ||||
| 	rcu_read_unlock(); | ||||
| 	page_frag_free(xdp.data); | ||||
| xdp_xmit: | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static int veth_xdp_rcv(struct veth_priv *priv, int budget) | ||||
| static int veth_xdp_rcv(struct veth_priv *priv, int budget, | ||||
| 			unsigned int *xdp_xmit) | ||||
| { | ||||
| 	int i, done = 0; | ||||
| 
 | ||||
|  | @ -490,10 +577,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget) | |||
| 		if (!ptr) | ||||
| 			break; | ||||
| 
 | ||||
| 		if (veth_is_xdp_frame(ptr)) | ||||
| 			skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr)); | ||||
| 		else | ||||
| 			skb = veth_xdp_rcv_skb(priv, ptr); | ||||
| 		if (veth_is_xdp_frame(ptr)) { | ||||
| 			skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr), | ||||
| 					       xdp_xmit); | ||||
| 		} else { | ||||
| 			skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit); | ||||
| 		} | ||||
| 
 | ||||
| 		if (skb) | ||||
| 			napi_gro_receive(&priv->xdp_napi, skb); | ||||
|  | @ -508,9 +597,11 @@ static int veth_poll(struct napi_struct *napi, int budget) | |||
| { | ||||
| 	struct veth_priv *priv = | ||||
| 		container_of(napi, struct veth_priv, xdp_napi); | ||||
| 	unsigned int xdp_xmit = 0; | ||||
| 	int done; | ||||
| 
 | ||||
| 	done = veth_xdp_rcv(priv, budget); | ||||
| 	xdp_set_return_frame_no_direct(); | ||||
| 	done = veth_xdp_rcv(priv, budget, &xdp_xmit); | ||||
| 
 | ||||
| 	if (done < budget && napi_complete_done(napi, done)) { | ||||
| 		/* Write rx_notify_masked before reading ptr_ring */ | ||||
|  | @ -521,6 +612,12 @@ static int veth_poll(struct napi_struct *napi, int budget) | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (xdp_xmit & VETH_XDP_TX) | ||||
| 		veth_xdp_flush(priv->dev); | ||||
| 	if (xdp_xmit & VETH_XDP_REDIR) | ||||
| 		xdp_do_flush_map(); | ||||
| 	xdp_clear_return_frame_no_direct(); | ||||
| 
 | ||||
| 	return done; | ||||
| } | ||||
| 
 | ||||
|  | @ -567,6 +664,9 @@ static int veth_enable_xdp(struct net_device *dev) | |||
| 		err = veth_napi_add(dev); | ||||
| 		if (err) | ||||
| 			goto err; | ||||
| 
 | ||||
| 		/* Save original mem info as it can be overwritten */ | ||||
| 		priv->xdp_mem = priv->xdp_rxq.mem; | ||||
| 	} | ||||
| 
 | ||||
| 	rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog); | ||||
|  | @ -584,6 +684,7 @@ static void veth_disable_xdp(struct net_device *dev) | |||
| 
 | ||||
| 	rcu_assign_pointer(priv->xdp_prog, NULL); | ||||
| 	veth_napi_del(dev); | ||||
| 	priv->xdp_rxq.mem = priv->xdp_mem; | ||||
| 	xdp_rxq_info_unreg(&priv->xdp_rxq); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Toshiaki Makita
						Toshiaki Makita