mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 08:44:41 +00:00 
			
		
		
		
	mlx5-updates-2023-03-28
Dragos Tatulea says: ==================== net/mlx5e: RX, Drop page_cache and fully use page_pool For page allocation on the rx path, the mlx5e driver has been using an internal page cache in tandem with the page pool. The internal page cache uses a queue for page recycling which has the issue of head of queue blocking. This patch series drops the internal page_cache altogether and uses the page_pool to implement everything that was done by the page_cache before: * Let the page_pool handle dma mapping and unmapping. * Use fragmented pages with fragment counter instead of tracking via page ref. * Enable skb recycling. The patch series has the following effects on the rx path: * Improved performance for the cases when there was low page recycling due to head of queue blocking in the internal page_cache. The test for this was running a single iperf TCP stream to a rx queue which is bound on the same cpu as the application. |-------------+--------+--------+------+---------| | rq type | before | after | unit | diff | |-------------+--------+--------+------+---------| | striding rq | 30.1 | 31.4 | Gbps | 4.14 % | | legacy rq | 30.2 | 33.0 | Gbps | 8.48 % | |-------------+--------+--------+------+---------| * Small XDP performance degradation. The test was is XDP drop program running on a single rx queue with small packets incoming it looks like this: |-------------+----------+----------+------+---------| | rq type | before | after | unit | diff | |-------------+----------+----------+------+---------| | striding rq | 19725449 | 18544617 | pps | -6.37 % | | legacy rq | 19879931 | 18631841 | pps | -6.70 % | |-------------+----------+----------+------+---------| This will be handled in a different patch series by adding support for multi-packet per page. * For other cases the performance is roughly the same. The above numbers were obtained on the following system: 24 core Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz 32 GB RAM ConnectX-7 single port The breakdown on the patch series is the following: * Preparations for introducing the mlx5e_frag_page struct. * Delete the mlx5e_page_cache struct. * Enable dma mapping from page_pool. * Enable skb recycling and fragment counting. * Do deferred release of pages (just before alloc) to ensure better page_pool cache utilization. ==================== -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEGhZs6bAKwk/OTgTpSD+KveBX+j4FAmQjUY8ACgkQSD+KveBX +j6tVAf/QHCbKgt9c2Q5EpFch2e4x3A/HfE7DbxTancIj0cc1bH98xd4wO574aE4 PCJ/aJ+9zTLvTUgUnKDaiqonfmcsF7v6d/ltoLW1PTNnPqdsjsXpVy76dnL81SWy u/g7h68cfeMdMjAAoewyVv+k7GeTIZCsIdvik3dWGFQ67IpE1k5dLbO13YBNW/5m Cm39RzD55tjgxS8GHdyFYAV4MwgHy+pdhTYR9LGzH80hfd02KqsCO38u1NIShuez 1rwjRF213Qdln20bMNSNiXG36JUV65mo+Q/XHKOEjB0qNKRcF5bzZovqHzP+R7QZ qhhhfce8c63UWpcXADP6k6qevW8+UA== =8F1t -----END PGP SIGNATURE----- Merge tag 'mlx5-updates-2023-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux Saeed Mahameed says: ==================== mlx5-updates-2023-03-28 Dragos Tatulea says: ==================== net/mlx5e: RX, Drop page_cache and fully use page_pool For page allocation on the rx path, the mlx5e driver has been using an internal page cache in tandem with the page pool. The internal page cache uses a queue for page recycling which has the issue of head of queue blocking. This patch series drops the internal page_cache altogether and uses the page_pool to implement everything that was done by the page_cache before: * Let the page_pool handle dma mapping and unmapping. * Use fragmented pages with fragment counter instead of tracking via page ref. * Enable skb recycling. The patch series has the following effects on the rx path: * Improved performance for the cases when there was low page recycling due to head of queue blocking in the internal page_cache. The test for this was running a single iperf TCP stream to a rx queue which is bound on the same cpu as the application. |-------------+--------+--------+------+---------| | rq type | before | after | unit | diff | |-------------+--------+--------+------+---------| | striding rq | 30.1 | 31.4 | Gbps | 4.14 % | | legacy rq | 30.2 | 33.0 | Gbps | 8.48 % | |-------------+--------+--------+------+---------| * Small XDP performance degradation. The test was is XDP drop program running on a single rx queue with small packets incoming it looks like this: |-------------+----------+----------+------+---------| | rq type | before | after | unit | diff | |-------------+----------+----------+------+---------| | striding rq | 19725449 | 18544617 | pps | -6.37 % | | legacy rq | 19879931 | 18631841 | pps | -6.70 % | |-------------+----------+----------+------+---------| This will be handled in a different patch series by adding support for multi-packet per page. * For other cases the performance is roughly the same. The above numbers were obtained on the following system: 24 core Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz 32 GB RAM ConnectX-7 single port The breakdown on the patch series is the following: * Preparations for introducing the mlx5e_frag_page struct. * Delete the mlx5e_page_cache struct. * Enable dma mapping from page_pool. * Enable skb recycling and fragment counting. * Do deferred release of pages (just before alloc) to ensure better page_pool cache utilization. ==================== * tag 'mlx5-updates-2023-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux: net/mlx5e: RX, Remove unnecessary recycle parameter and page_cache stats net/mlx5e: RX, Break the wqe bulk refill in smaller chunks net/mlx5e: RX, Increase WQE bulk size for legacy rq net/mlx5e: RX, Split off release path for xsk buffers for legacy rq net/mlx5e: RX, Defer page release in legacy rq for better recycling net/mlx5e: RX, Change wqe last_in_page field from bool to bit flags net/mlx5e: RX, Defer page release in striding rq for better recycling net/mlx5e: RX, Rename xdp_xmit_bitmap to a more generic name net/mlx5e: RX, Enable skb page recycling through the page_pool net/mlx5e: RX, Enable dma map and sync from page_pool allocator net/mlx5e: RX, Remove internal page_cache net/mlx5e: RX, Store SHAMPO header pages in array net/mlx5e: RX, Remove alloc unit layout constraint for striding rq net/mlx5e: RX, Remove alloc unit layout constraint for legacy rq net/mlx5e: RX, Remove mlx5e_alloc_unit argument in page allocation ==================== Link: https://lore.kernel.org/r/20230328205623.142075-1-saeed@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
						commit
						7079d5e61a
					
				
					 11 changed files with 467 additions and 392 deletions
				
			
		|  | @ -346,32 +346,6 @@ the software port. | ||||||
|      - The number of receive packets with CQE compression on ring i [#accel]_. |      - The number of receive packets with CQE compression on ring i [#accel]_. | ||||||
|      - Acceleration |      - Acceleration | ||||||
| 
 | 
 | ||||||
|    * - `rx[i]_cache_reuse` |  | ||||||
|      - The number of events of successful reuse of a page from a driver's |  | ||||||
|        internal page cache. |  | ||||||
|      - Acceleration |  | ||||||
| 
 |  | ||||||
|    * - `rx[i]_cache_full` |  | ||||||
|      - The number of events of full internal page cache where driver can't put a |  | ||||||
|        page back to the cache for recycling (page will be freed). |  | ||||||
|      - Acceleration |  | ||||||
| 
 |  | ||||||
|    * - `rx[i]_cache_empty` |  | ||||||
|      - The number of events where cache was empty - no page to give. Driver |  | ||||||
|        shall allocate new page. |  | ||||||
|      - Acceleration |  | ||||||
| 
 |  | ||||||
|    * - `rx[i]_cache_busy` |  | ||||||
|      - The number of events where cache head was busy and cannot be recycled. |  | ||||||
|        Driver allocated new page. |  | ||||||
|      - Acceleration |  | ||||||
| 
 |  | ||||||
|    * - `rx[i]_cache_waive` |  | ||||||
|      - The number of cache evacuation. This can occur due to page move to |  | ||||||
|        another NUMA node or page was pfmemalloc-ed and should be freed as soon |  | ||||||
|        as possible. |  | ||||||
|      - Acceleration |  | ||||||
| 
 |  | ||||||
|    * - `rx[i]_arfs_err` |    * - `rx[i]_arfs_err` | ||||||
|      - Number of flow rules that failed to be added to the flow table. |      - Number of flow rules that failed to be added to the flow table. | ||||||
|      - Error |      - Error | ||||||
|  |  | ||||||
|  | @ -475,11 +475,6 @@ struct mlx5e_txqsq { | ||||||
| 	cqe_ts_to_ns               ptp_cyc2time; | 	cqe_ts_to_ns               ptp_cyc2time; | ||||||
| } ____cacheline_aligned_in_smp; | } ____cacheline_aligned_in_smp; | ||||||
| 
 | 
 | ||||||
| union mlx5e_alloc_unit { |  | ||||||
| 	struct page *page; |  | ||||||
| 	struct xdp_buff *xsk; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /* XDP packets can be transmitted in different ways. On completion, we need to
 | /* XDP packets can be transmitted in different ways. On completion, we need to
 | ||||||
|  * distinguish between them to clean up things in a proper way. |  * distinguish between them to clean up things in a proper way. | ||||||
|  */ |  */ | ||||||
|  | @ -605,16 +600,35 @@ struct mlx5e_icosq { | ||||||
| 	struct work_struct         recover_work; | 	struct work_struct         recover_work; | ||||||
| } ____cacheline_aligned_in_smp; | } ____cacheline_aligned_in_smp; | ||||||
| 
 | 
 | ||||||
|  | struct mlx5e_frag_page { | ||||||
|  | 	struct page *page; | ||||||
|  | 	u16 frags; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum mlx5e_wqe_frag_flag { | ||||||
|  | 	MLX5E_WQE_FRAG_LAST_IN_PAGE, | ||||||
|  | 	MLX5E_WQE_FRAG_SKIP_RELEASE, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct mlx5e_wqe_frag_info { | struct mlx5e_wqe_frag_info { | ||||||
| 	union mlx5e_alloc_unit *au; | 	union { | ||||||
|  | 		struct mlx5e_frag_page *frag_page; | ||||||
|  | 		struct xdp_buff **xskp; | ||||||
|  | 	}; | ||||||
| 	u32 offset; | 	u32 offset; | ||||||
| 	bool last_in_page; | 	u8 flags; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union mlx5e_alloc_units { | ||||||
|  | 	DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages); | ||||||
|  | 	DECLARE_FLEX_ARRAY(struct page *, pages); | ||||||
|  | 	DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct mlx5e_mpw_info { | struct mlx5e_mpw_info { | ||||||
| 	u16 consumed_strides; | 	u16 consumed_strides; | ||||||
| 	DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); | 	DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); | ||||||
| 	union mlx5e_alloc_unit alloc_units[]; | 	union mlx5e_alloc_units alloc_units; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define MLX5E_MAX_RX_FRAGS 4 | #define MLX5E_MAX_RX_FRAGS 4 | ||||||
|  | @ -625,11 +639,6 @@ struct mlx5e_mpw_info { | ||||||
| #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ | #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ | ||||||
| 			  MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) | 			  MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) | ||||||
| #define MLX5E_CACHE_SIZE	(4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) | #define MLX5E_CACHE_SIZE	(4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) | ||||||
| struct mlx5e_page_cache { |  | ||||||
| 	u32 head; |  | ||||||
| 	u32 tail; |  | ||||||
| 	struct page *page_cache[MLX5E_CACHE_SIZE]; |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| struct mlx5e_rq; | struct mlx5e_rq; | ||||||
| typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); | typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); | ||||||
|  | @ -661,19 +670,24 @@ struct mlx5e_rq_frags_info { | ||||||
| 	struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; | 	struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; | ||||||
| 	u8 num_frags; | 	u8 num_frags; | ||||||
| 	u8 log_num_frags; | 	u8 log_num_frags; | ||||||
| 	u8 wqe_bulk; | 	u16 wqe_bulk; | ||||||
|  | 	u16 refill_unit; | ||||||
| 	u8 wqe_index_mask; | 	u8 wqe_index_mask; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct mlx5e_dma_info { | struct mlx5e_dma_info { | ||||||
| 	dma_addr_t addr; | 	dma_addr_t addr; | ||||||
| 	struct page *page; | 	union { | ||||||
|  | 		struct mlx5e_frag_page *frag_page; | ||||||
|  | 		struct page *page; | ||||||
|  | 	}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct mlx5e_shampo_hd { | struct mlx5e_shampo_hd { | ||||||
| 	u32 mkey; | 	u32 mkey; | ||||||
| 	struct mlx5e_dma_info *info; | 	struct mlx5e_dma_info *info; | ||||||
| 	struct page *last_page; | 	struct mlx5e_frag_page *pages; | ||||||
|  | 	u16 curr_page_index; | ||||||
| 	u16 hd_per_wq; | 	u16 hd_per_wq; | ||||||
| 	u16 hd_per_wqe; | 	u16 hd_per_wqe; | ||||||
| 	unsigned long *bitmap; | 	unsigned long *bitmap; | ||||||
|  | @ -702,7 +716,7 @@ struct mlx5e_rq { | ||||||
| 		struct { | 		struct { | ||||||
| 			struct mlx5_wq_cyc          wq; | 			struct mlx5_wq_cyc          wq; | ||||||
| 			struct mlx5e_wqe_frag_info *frags; | 			struct mlx5e_wqe_frag_info *frags; | ||||||
| 			union mlx5e_alloc_unit     *alloc_units; | 			union mlx5e_alloc_units    *alloc_units; | ||||||
| 			struct mlx5e_rq_frags_info  info; | 			struct mlx5e_rq_frags_info  info; | ||||||
| 			mlx5e_fp_skb_from_cqe       skb_from_cqe; | 			mlx5e_fp_skb_from_cqe       skb_from_cqe; | ||||||
| 		} wqe; | 		} wqe; | ||||||
|  | @ -738,7 +752,6 @@ struct mlx5e_rq { | ||||||
| 	struct mlx5e_rq_stats *stats; | 	struct mlx5e_rq_stats *stats; | ||||||
| 	struct mlx5e_cq        cq; | 	struct mlx5e_cq        cq; | ||||||
| 	struct mlx5e_cq_decomp cqd; | 	struct mlx5e_cq_decomp cqd; | ||||||
| 	struct mlx5e_page_cache page_cache; |  | ||||||
| 	struct hwtstamp_config *tstamp; | 	struct hwtstamp_config *tstamp; | ||||||
| 	struct mlx5_clock      *clock; | 	struct mlx5_clock      *clock; | ||||||
| 	struct mlx5e_icosq    *icosq; | 	struct mlx5e_icosq    *icosq; | ||||||
|  |  | ||||||
|  | @ -667,6 +667,48 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) | ||||||
| 	return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; | 	return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, | ||||||
|  | 					     struct mlx5e_rq_frags_info *info) | ||||||
|  | { | ||||||
|  | 	u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4; | ||||||
|  | 	u32 bulk_bound_rq_size_in_bytes; | ||||||
|  | 	u32 sum_frag_strides = 0; | ||||||
|  | 	u32 wqe_bulk_in_bytes; | ||||||
|  | 	u16 split_factor; | ||||||
|  | 	u32 wqe_bulk; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < info->num_frags; i++) | ||||||
|  | 		sum_frag_strides += info->arr[i].frag_stride; | ||||||
|  | 
 | ||||||
|  | 	/* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect
 | ||||||
|  | 	 * amount of consumed pages per wqe in bytes. | ||||||
|  | 	 */ | ||||||
|  | 	if (sum_frag_strides > PAGE_SIZE) | ||||||
|  | 		sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE); | ||||||
|  | 
 | ||||||
|  | 	bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides; | ||||||
|  | 
 | ||||||
|  | #define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024) | ||||||
|  | 
 | ||||||
|  | 	/* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP
 | ||||||
|  | 	 * keep bulk size smaller to avoid filling the page_pool cache on | ||||||
|  | 	 * every bulk refill. | ||||||
|  | 	 */ | ||||||
|  | 	wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog), | ||||||
|  | 				  bulk_bound_rq_size_in_bytes); | ||||||
|  | 	wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides); | ||||||
|  | 
 | ||||||
|  | 	/* Make sure that allocations don't start when the page is still used
 | ||||||
|  | 	 * by older WQEs. | ||||||
|  | 	 */ | ||||||
|  | 	info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk); | ||||||
|  | 
 | ||||||
|  | 	split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog), | ||||||
|  | 				    PP_ALLOC_CACHE_REFILL * PAGE_SIZE); | ||||||
|  | 	info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #define DEFAULT_FRAG_SIZE (2048) | #define DEFAULT_FRAG_SIZE (2048) | ||||||
| 
 | 
 | ||||||
| static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, | static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, | ||||||
|  | @ -774,11 +816,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| out: | out: | ||||||
| 	/* Bulking optimization to skip allocation until at least 8 WQEs can be
 | 	/* Bulking optimization to skip allocation until a large enough number
 | ||||||
| 	 * allocated in a row. At the same time, never start allocation when | 	 * of WQEs can be allocated in a row. Bulking also influences how well | ||||||
| 	 * the page is still used by older WQEs. | 	 * deferred page release works. | ||||||
| 	 */ | 	 */ | ||||||
| 	info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8); | 	mlx5e_rx_compute_wqe_bulk_params(params, info); | ||||||
|  | 
 | ||||||
|  | 	mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n", | ||||||
|  | 		      __func__, info->wqe_bulk, info->refill_unit); | ||||||
| 
 | 
 | ||||||
| 	info->log_num_frags = order_base_2(info->num_frags); | 	info->log_num_frags = order_base_2(info->num_frags); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -121,9 +121,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) | ||||||
| 
 | 
 | ||||||
| 	mlx5e_reset_icosq_cc_pc(icosq); | 	mlx5e_reset_icosq_cc_pc(icosq); | ||||||
| 
 | 
 | ||||||
| 	mlx5e_free_rx_in_progress_descs(rq); | 	mlx5e_free_rx_missing_descs(rq); | ||||||
| 	if (xskrq) | 	if (xskrq) | ||||||
| 		mlx5e_free_rx_in_progress_descs(xskrq); | 		mlx5e_free_rx_missing_descs(xskrq); | ||||||
| 
 | 
 | ||||||
| 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); | 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); | ||||||
| 	mlx5e_activate_icosq(icosq); | 	mlx5e_activate_icosq(icosq); | ||||||
|  |  | ||||||
|  | @ -65,13 +65,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); | ||||||
| int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); | int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); | ||||||
| 
 | 
 | ||||||
| /* RX */ | /* RX */ | ||||||
| void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page); |  | ||||||
| void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); |  | ||||||
| INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); | INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); | ||||||
| INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); | INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); | ||||||
| int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); | int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); | ||||||
| void mlx5e_free_rx_descs(struct mlx5e_rq *rq); | void mlx5e_free_rx_descs(struct mlx5e_rq *rq); | ||||||
| void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); | void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq); | ||||||
| 
 | 
 | ||||||
| static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) | static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) | ||||||
| { | { | ||||||
|  | @ -489,7 +487,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size | ||||||
| 
 | 
 | ||||||
| static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) | static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) | ||||||
| { | { | ||||||
| 	size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe); | 	size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe); | ||||||
| 
 | 
 | ||||||
| 	return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); | 	return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -209,8 +209,6 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, | ||||||
| 			goto xdp_abort; | 			goto xdp_abort; | ||||||
| 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); | 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); | ||||||
| 		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); | 		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); | ||||||
| 		if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) |  | ||||||
| 			mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data)); |  | ||||||
| 		rq->stats->xdp_redirect++; | 		rq->stats->xdp_redirect++; | ||||||
| 		return true; | 		return true; | ||||||
| 	default: | 	default: | ||||||
|  | @ -507,7 +505,6 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, | ||||||
| static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, | static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, | ||||||
| 				  struct mlx5e_xdp_wqe_info *wi, | 				  struct mlx5e_xdp_wqe_info *wi, | ||||||
| 				  u32 *xsk_frames, | 				  u32 *xsk_frames, | ||||||
| 				  bool recycle, |  | ||||||
| 				  struct xdp_frame_bulk *bq) | 				  struct xdp_frame_bulk *bq) | ||||||
| { | { | ||||||
| 	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; | 	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; | ||||||
|  | @ -525,7 +522,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, | ||||||
| 			break; | 			break; | ||||||
| 		case MLX5E_XDP_XMIT_MODE_PAGE: | 		case MLX5E_XDP_XMIT_MODE_PAGE: | ||||||
| 			/* XDP_TX from the regular RQ */ | 			/* XDP_TX from the regular RQ */ | ||||||
| 			mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle); | 			page_pool_put_defragged_page(xdpi.page.rq->page_pool, | ||||||
|  | 						     xdpi.page.page, -1, true); | ||||||
| 			break; | 			break; | ||||||
| 		case MLX5E_XDP_XMIT_MODE_XSK: | 		case MLX5E_XDP_XMIT_MODE_XSK: | ||||||
| 			/* AF_XDP send */ | 			/* AF_XDP send */ | ||||||
|  | @ -579,7 +577,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) | ||||||
| 
 | 
 | ||||||
| 			sqcc += wi->num_wqebbs; | 			sqcc += wi->num_wqebbs; | ||||||
| 
 | 
 | ||||||
| 			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); | 			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); | ||||||
| 		} while (!last_wqe); | 		} while (!last_wqe); | ||||||
| 
 | 
 | ||||||
| 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { | 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { | ||||||
|  | @ -626,7 +624,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) | ||||||
| 
 | 
 | ||||||
| 		sq->cc += wi->num_wqebbs; | 		sq->cc += wi->num_wqebbs; | ||||||
| 
 | 
 | ||||||
| 		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); | 		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	xdp_flush_frame_bulk(&bq); | 	xdp_flush_frame_bulk(&bq); | ||||||
|  |  | ||||||
|  | @ -22,6 +22,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 	struct mlx5e_icosq *icosq = rq->icosq; | 	struct mlx5e_icosq *icosq = rq->icosq; | ||||||
| 	struct mlx5_wq_cyc *wq = &icosq->wq; | 	struct mlx5_wq_cyc *wq = &icosq->wq; | ||||||
| 	struct mlx5e_umr_wqe *umr_wqe; | 	struct mlx5e_umr_wqe *umr_wqe; | ||||||
|  | 	struct xdp_buff **xsk_buffs; | ||||||
| 	int batch, i; | 	int batch, i; | ||||||
| 	u32 offset; /* 17-bit value with MTT. */ | 	u32 offset; /* 17-bit value with MTT. */ | ||||||
| 	u16 pi; | 	u16 pi; | ||||||
|  | @ -29,9 +30,9 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 	if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) | 	if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) | ||||||
| 		goto err; | 		goto err; | ||||||
| 
 | 
 | ||||||
| 	BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); |  | ||||||
| 	XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); | 	XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); | ||||||
| 	batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, | 	xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs; | ||||||
|  | 	batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs, | ||||||
| 				     rq->mpwqe.pages_per_wqe); | 				     rq->mpwqe.pages_per_wqe); | ||||||
| 
 | 
 | ||||||
| 	/* If batch < pages_per_wqe, either:
 | 	/* If batch < pages_per_wqe, either:
 | ||||||
|  | @ -41,8 +42,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 	 * the first error, which will mean there are no more valid descriptors. | 	 * the first error, which will mean there are no more valid descriptors. | ||||||
| 	 */ | 	 */ | ||||||
| 	for (; batch < rq->mpwqe.pages_per_wqe; batch++) { | 	for (; batch < rq->mpwqe.pages_per_wqe; batch++) { | ||||||
| 		wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); | 		xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool); | ||||||
| 		if (unlikely(!wi->alloc_units[batch].xsk)) | 		if (unlikely(!xsk_buffs[batch])) | ||||||
| 			goto err_reuse_batch; | 			goto err_reuse_batch; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -52,8 +53,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 
 | 
 | ||||||
| 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { | 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { | ||||||
| 		for (i = 0; i < batch; i++) { | 		for (i = 0; i < batch; i++) { | ||||||
| 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); | 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); | ||||||
| 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); | 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); | ||||||
| 
 | 
 | ||||||
| 			umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { | 			umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { | ||||||
| 				.ptag = cpu_to_be64(addr | MLX5_EN_WR), | 				.ptag = cpu_to_be64(addr | MLX5_EN_WR), | ||||||
|  | @ -62,8 +63,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 		} | 		} | ||||||
| 	} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { | 	} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { | ||||||
| 		for (i = 0; i < batch; i++) { | 		for (i = 0; i < batch; i++) { | ||||||
| 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); | 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); | ||||||
| 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); | 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); | ||||||
| 
 | 
 | ||||||
| 			umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { | 			umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { | ||||||
| 				.key = rq->mkey_be, | 				.key = rq->mkey_be, | ||||||
|  | @ -75,8 +76,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 		u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); | 		u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < batch; i++) { | 		for (i = 0; i < batch; i++) { | ||||||
| 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); | 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); | ||||||
| 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); | 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); | ||||||
| 
 | 
 | ||||||
| 			umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { | 			umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { | ||||||
| 				.key = rq->mkey_be, | 				.key = rq->mkey_be, | ||||||
|  | @ -102,8 +103,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 		__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); | 		__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < batch; i++) { | 		for (i = 0; i < batch; i++) { | ||||||
| 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); | 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); | ||||||
| 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); | 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); | ||||||
| 
 | 
 | ||||||
| 			umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { | 			umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { | ||||||
| 				.key = rq->mkey_be, | 				.key = rq->mkey_be, | ||||||
|  | @ -119,7 +120,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); | 	bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); | ||||||
| 	wi->consumed_strides = 0; | 	wi->consumed_strides = 0; | ||||||
| 
 | 
 | ||||||
| 	umr_wqe->ctrl.opmod_idx_opcode = | 	umr_wqe->ctrl.opmod_idx_opcode = | ||||||
|  | @ -149,7 +150,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 
 | 
 | ||||||
| err_reuse_batch: | err_reuse_batch: | ||||||
| 	while (--batch >= 0) | 	while (--batch >= 0) | ||||||
| 		xsk_buff_free(wi->alloc_units[batch].xsk); | 		xsk_buff_free(xsk_buffs[batch]); | ||||||
| 
 | 
 | ||||||
| err: | err: | ||||||
| 	rq->stats->buff_alloc_err++; | 	rq->stats->buff_alloc_err++; | ||||||
|  | @ -163,13 +164,10 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
| 	u32 contig, alloc; | 	u32 contig, alloc; | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	/* mlx5e_init_frags_partition creates a 1:1 mapping between
 | 	/* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the
 | ||||||
| 	 * rq->wqe.frags and rq->wqe.alloc_units, which allows us to | 	 * rq->wqe.alloc_units->xsk_buffs array allocated here. | ||||||
| 	 * allocate XDP buffers straight into alloc_units. |  | ||||||
| 	 */ | 	 */ | ||||||
| 	BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != | 	buffs = rq->wqe.alloc_units->xsk_buffs; | ||||||
| 		     sizeof(rq->wqe.alloc_units[0].xsk)); |  | ||||||
| 	buffs = (struct xdp_buff **)rq->wqe.alloc_units; |  | ||||||
| 	contig = mlx5_wq_cyc_get_size(wq) - ix; | 	contig = mlx5_wq_cyc_get_size(wq) - ix; | ||||||
| 	if (wqe_bulk <= contig) { | 	if (wqe_bulk <= contig) { | ||||||
| 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); | 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); | ||||||
|  | @ -189,8 +187,9 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
| 		/* Assumes log_num_frags == 0. */ | 		/* Assumes log_num_frags == 0. */ | ||||||
| 		frag = &rq->wqe.frags[j]; | 		frag = &rq->wqe.frags[j]; | ||||||
| 
 | 
 | ||||||
| 		addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); | 		addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); | ||||||
| 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); | 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); | ||||||
|  | 		frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return alloc; | 	return alloc; | ||||||
|  | @ -211,12 +210,13 @@ int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
| 		/* Assumes log_num_frags == 0. */ | 		/* Assumes log_num_frags == 0. */ | ||||||
| 		frag = &rq->wqe.frags[j]; | 		frag = &rq->wqe.frags[j]; | ||||||
| 
 | 
 | ||||||
| 		frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); | 		*frag->xskp = xsk_buff_alloc(rq->xsk_pool); | ||||||
| 		if (unlikely(!frag->au->xsk)) | 		if (unlikely(!*frag->xskp)) | ||||||
| 			return i; | 			return i; | ||||||
| 
 | 
 | ||||||
| 		addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); | 		addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); | ||||||
| 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); | 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); | ||||||
|  | 		frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return wqe_bulk; | 	return wqe_bulk; | ||||||
|  | @ -251,7 +251,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, | ||||||
| 						    u32 head_offset, | 						    u32 head_offset, | ||||||
| 						    u32 page_idx) | 						    u32 page_idx) | ||||||
| { | { | ||||||
| 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk); | 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]); | ||||||
| 	struct bpf_prog *prog; | 	struct bpf_prog *prog; | ||||||
| 
 | 
 | ||||||
| 	/* Check packet size. Note LRO doesn't use linear SKB */ | 	/* Check packet size. Note LRO doesn't use linear SKB */ | ||||||
|  | @ -291,7 +291,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, | ||||||
| 	prog = rcu_dereference(rq->xdp_prog); | 	prog = rcu_dereference(rq->xdp_prog); | ||||||
| 	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { | 	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { | ||||||
| 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) | 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) | ||||||
| 			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ | 			__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ | ||||||
| 		return NULL; /* page/packet was consumed by XDP */ | 		return NULL; /* page/packet was consumed by XDP */ | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -306,7 +306,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, | ||||||
| 					      struct mlx5_cqe64 *cqe, | 					      struct mlx5_cqe64 *cqe, | ||||||
| 					      u32 cqe_bcnt) | 					      u32 cqe_bcnt) | ||||||
| { | { | ||||||
| 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk); | 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp); | ||||||
| 	struct bpf_prog *prog; | 	struct bpf_prog *prog; | ||||||
| 
 | 
 | ||||||
| 	/* wi->offset is not used in this function, because xdp->data and the
 | 	/* wi->offset is not used in this function, because xdp->data and the
 | ||||||
|  |  | ||||||
|  | @ -262,23 +262,30 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) | ||||||
| 
 | 
 | ||||||
| 	shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, | 	shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, | ||||||
| 					    node); | 					    node); | ||||||
| 	if (!shampo->bitmap) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 
 |  | ||||||
| 	shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, | 	shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, | ||||||
| 						sizeof(*shampo->info)), | 						sizeof(*shampo->info)), | ||||||
| 				     GFP_KERNEL, node); | 				     GFP_KERNEL, node); | ||||||
| 	if (!shampo->info) { | 	shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, | ||||||
| 		kvfree(shampo->bitmap); | 						 sizeof(*shampo->pages)), | ||||||
| 		return -ENOMEM; | 				     GFP_KERNEL, node); | ||||||
| 	} | 	if (!shampo->bitmap || !shampo->info || !shampo->pages) | ||||||
|  | 		goto err_nomem; | ||||||
|  | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
|  | 
 | ||||||
|  | err_nomem: | ||||||
|  | 	kvfree(shampo->info); | ||||||
|  | 	kvfree(shampo->bitmap); | ||||||
|  | 	kvfree(shampo->pages); | ||||||
|  | 
 | ||||||
|  | 	return -ENOMEM; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) | static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) | ||||||
| { | { | ||||||
| 	kvfree(rq->mpwqe.shampo->bitmap); | 	kvfree(rq->mpwqe.shampo->bitmap); | ||||||
| 	kvfree(rq->mpwqe.shampo->info); | 	kvfree(rq->mpwqe.shampo->info); | ||||||
|  | 	kvfree(rq->mpwqe.shampo->pages); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) | static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) | ||||||
|  | @ -286,13 +293,23 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) | ||||||
| 	int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); | 	int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); | ||||||
| 	size_t alloc_size; | 	size_t alloc_size; | ||||||
| 
 | 
 | ||||||
| 	alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units, | 	alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, | ||||||
|  | 						   alloc_units.frag_pages, | ||||||
| 						   rq->mpwqe.pages_per_wqe)); | 						   rq->mpwqe.pages_per_wqe)); | ||||||
| 
 | 
 | ||||||
| 	rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); | 	rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); | ||||||
| 	if (!rq->mpwqe.info) | 	if (!rq->mpwqe.info) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 
 | 
 | ||||||
|  | 	/* For deferred page release (release right before alloc), make sure
 | ||||||
|  | 	 * that on first round release is not called. | ||||||
|  | 	 */ | ||||||
|  | 	for (int i = 0; i < wq_sz; i++) { | ||||||
|  | 		struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i); | ||||||
|  | 
 | ||||||
|  | 		bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); | 	mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
|  | @ -499,14 +516,12 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) | ||||||
| 	struct mlx5e_wqe_frag_info *prev = NULL; | 	struct mlx5e_wqe_frag_info *prev = NULL; | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	if (rq->xsk_pool) { | 	WARN_ON(rq->xsk_pool); | ||||||
| 		/* Assumptions used by XSK batched allocator. */ |  | ||||||
| 		WARN_ON(rq->wqe.info.num_frags != 1); |  | ||||||
| 		WARN_ON(rq->wqe.info.log_num_frags != 0); |  | ||||||
| 		WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	next_frag.au = &rq->wqe.alloc_units[0]; | 	next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; | ||||||
|  | 
 | ||||||
|  | 	/* Skip first release due to deferred release. */ | ||||||
|  | 	next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { | 	for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { | ||||||
| 		struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; | 		struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; | ||||||
|  | @ -516,10 +531,11 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) | ||||||
| 
 | 
 | ||||||
| 		for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { | 		for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { | ||||||
| 			if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { | 			if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { | ||||||
| 				next_frag.au++; | 				/* Pages are assigned at runtime. */ | ||||||
|  | 				next_frag.frag_page++; | ||||||
| 				next_frag.offset = 0; | 				next_frag.offset = 0; | ||||||
| 				if (prev) | 				if (prev) | ||||||
| 					prev->last_in_page = true; | 					prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); | ||||||
| 			} | 			} | ||||||
| 			*frag = next_frag; | 			*frag = next_frag; | ||||||
| 
 | 
 | ||||||
|  | @ -530,25 +546,68 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (prev) | 	if (prev) | ||||||
| 		prev->last_in_page = true; | 		prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node) | static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) | ||||||
| { | { | ||||||
| 	int len = wq_sz << rq->wqe.info.log_num_frags; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)), | 	/* Assumptions used by XSK batched allocator. */ | ||||||
| 					    GFP_KERNEL, node); | 	WARN_ON(rq->wqe.info.num_frags != 1); | ||||||
| 	if (!rq->wqe.alloc_units) | 	WARN_ON(rq->wqe.info.log_num_frags != 0); | ||||||
|  | 	WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); | ||||||
|  | 
 | ||||||
|  | 	/* Considering the above assumptions a fragment maps to a single
 | ||||||
|  | 	 * xsk_buff. | ||||||
|  | 	 */ | ||||||
|  | 	for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { | ||||||
|  | 		rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; | ||||||
|  | 
 | ||||||
|  | 		/* Skip first release due to deferred release as WQES are
 | ||||||
|  | 		 * not allocated yet. | ||||||
|  | 		 */ | ||||||
|  | 		rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) | ||||||
|  | { | ||||||
|  | 	int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); | ||||||
|  | 	int len = wq_sz << rq->wqe.info.log_num_frags; | ||||||
|  | 	struct mlx5e_wqe_frag_info *frags; | ||||||
|  | 	union mlx5e_alloc_units *aus; | ||||||
|  | 	int aus_sz; | ||||||
|  | 
 | ||||||
|  | 	if (rq->xsk_pool) | ||||||
|  | 		aus_sz = sizeof(*aus->xsk_buffs); | ||||||
|  | 	else | ||||||
|  | 		aus_sz = sizeof(*aus->frag_pages); | ||||||
|  | 
 | ||||||
|  | 	aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); | ||||||
|  | 	if (!aus) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 
 | 
 | ||||||
| 	mlx5e_init_frags_partition(rq); | 	frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node); | ||||||
|  | 	if (!frags) { | ||||||
|  | 		kvfree(aus); | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	rq->wqe.alloc_units = aus; | ||||||
|  | 	rq->wqe.frags = frags; | ||||||
|  | 
 | ||||||
|  | 	if (rq->xsk_pool) | ||||||
|  | 		mlx5e_init_xsk_buffs(rq); | ||||||
|  | 	else | ||||||
|  | 		mlx5e_init_frags_partition(rq); | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mlx5e_free_au_list(struct mlx5e_rq *rq) | static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq) | ||||||
| { | { | ||||||
|  | 	kvfree(rq->wqe.frags); | ||||||
| 	kvfree(rq->wqe.alloc_units); | 	kvfree(rq->wqe.alloc_units); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -693,7 +752,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, | ||||||
| 			  struct mlx5e_rq_param *rqp, | 			  struct mlx5e_rq_param *rqp, | ||||||
| 			  int node, struct mlx5e_rq *rq) | 			  int node, struct mlx5e_rq *rq) | ||||||
| { | { | ||||||
| 	struct page_pool_params pp_params = { 0 }; |  | ||||||
| 	struct mlx5_core_dev *mdev = rq->mdev; | 	struct mlx5_core_dev *mdev = rq->mdev; | ||||||
| 	void *rqc = rqp->rqc; | 	void *rqc = rqp->rqc; | ||||||
| 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); | 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); | ||||||
|  | @ -778,18 +836,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, | ||||||
| 		rq->wqe.info = rqp->frags_info; | 		rq->wqe.info = rqp->frags_info; | ||||||
| 		rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; | 		rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; | ||||||
| 
 | 
 | ||||||
| 		rq->wqe.frags = | 		err = mlx5e_init_wqe_alloc_info(rq, node); | ||||||
| 			kvzalloc_node(array_size(sizeof(*rq->wqe.frags), |  | ||||||
| 					(wq_sz << rq->wqe.info.log_num_frags)), |  | ||||||
| 				      GFP_KERNEL, node); |  | ||||||
| 		if (!rq->wqe.frags) { |  | ||||||
| 			err = -ENOMEM; |  | ||||||
| 			goto err_rq_wq_destroy; |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		err = mlx5e_init_au_list(rq, wq_sz, node); |  | ||||||
| 		if (err) | 		if (err) | ||||||
| 			goto err_rq_frags; | 			goto err_rq_wq_destroy; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (xsk) { | 	if (xsk) { | ||||||
|  | @ -798,12 +847,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, | ||||||
| 		xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); | 		xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); | ||||||
| 	} else { | 	} else { | ||||||
| 		/* Create a page_pool and register it with rxq */ | 		/* Create a page_pool and register it with rxq */ | ||||||
|  | 		struct page_pool_params pp_params = { 0 }; | ||||||
|  | 
 | ||||||
| 		pp_params.order     = 0; | 		pp_params.order     = 0; | ||||||
| 		pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */ | 		pp_params.flags     = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG; | ||||||
| 		pp_params.pool_size = pool_size; | 		pp_params.pool_size = pool_size; | ||||||
| 		pp_params.nid       = node; | 		pp_params.nid       = node; | ||||||
| 		pp_params.dev       = rq->pdev; | 		pp_params.dev       = rq->pdev; | ||||||
| 		pp_params.dma_dir   = rq->buff.map_dir; | 		pp_params.dma_dir   = rq->buff.map_dir; | ||||||
|  | 		pp_params.max_len   = PAGE_SIZE; | ||||||
| 
 | 
 | ||||||
| 		/* page_pool can be used even when there is no rq->xdp_prog,
 | 		/* page_pool can be used even when there is no rq->xdp_prog,
 | ||||||
| 		 * given page_pool does not handle DMA mapping there is no | 		 * given page_pool does not handle DMA mapping there is no | ||||||
|  | @ -869,9 +921,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, | ||||||
| 		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; | 		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	rq->page_cache.head = 0; |  | ||||||
| 	rq->page_cache.tail = 0; |  | ||||||
| 
 |  | ||||||
| 	return 0; | 	return 0; | ||||||
| 
 | 
 | ||||||
| err_destroy_page_pool: | err_destroy_page_pool: | ||||||
|  | @ -888,9 +937,7 @@ err_rq_drop_page: | ||||||
| 		mlx5e_free_mpwqe_rq_drop_page(rq); | 		mlx5e_free_mpwqe_rq_drop_page(rq); | ||||||
| 		break; | 		break; | ||||||
| 	default: /* MLX5_WQ_TYPE_CYCLIC */ | 	default: /* MLX5_WQ_TYPE_CYCLIC */ | ||||||
| 		mlx5e_free_au_list(rq); | 		mlx5e_free_wqe_alloc_info(rq); | ||||||
| err_rq_frags: |  | ||||||
| 		kvfree(rq->wqe.frags); |  | ||||||
| 	} | 	} | ||||||
| err_rq_wq_destroy: | err_rq_wq_destroy: | ||||||
| 	mlx5_wq_destroy(&rq->wq_ctrl); | 	mlx5_wq_destroy(&rq->wq_ctrl); | ||||||
|  | @ -904,7 +951,6 @@ err_rq_xdp_prog: | ||||||
| static void mlx5e_free_rq(struct mlx5e_rq *rq) | static void mlx5e_free_rq(struct mlx5e_rq *rq) | ||||||
| { | { | ||||||
| 	struct bpf_prog *old_prog; | 	struct bpf_prog *old_prog; | ||||||
| 	int i; |  | ||||||
| 
 | 
 | ||||||
| 	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { | 	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { | ||||||
| 		old_prog = rcu_dereference_protected(rq->xdp_prog, | 		old_prog = rcu_dereference_protected(rq->xdp_prog, | ||||||
|  | @ -921,17 +967,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) | ||||||
| 		mlx5e_rq_free_shampo(rq); | 		mlx5e_rq_free_shampo(rq); | ||||||
| 		break; | 		break; | ||||||
| 	default: /* MLX5_WQ_TYPE_CYCLIC */ | 	default: /* MLX5_WQ_TYPE_CYCLIC */ | ||||||
| 		kvfree(rq->wqe.frags); | 		mlx5e_free_wqe_alloc_info(rq); | ||||||
| 		mlx5e_free_au_list(rq); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	for (i = rq->page_cache.head; i != rq->page_cache.tail; |  | ||||||
| 	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { |  | ||||||
| 		/* With AF_XDP, page_cache is not used, so this loop is not
 |  | ||||||
| 		 * entered, and it's safe to call mlx5e_page_release_dynamic |  | ||||||
| 		 * directly. |  | ||||||
| 		 */ |  | ||||||
| 		mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	xdp_rxq_info_unreg(&rq->xdp_rxq); | 	xdp_rxq_info_unreg(&rq->xdp_rxq); | ||||||
|  | @ -1094,7 +1130,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) | ||||||
| 	return -ETIMEDOUT; | 	return -ETIMEDOUT; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) | void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) | ||||||
| { | { | ||||||
| 	struct mlx5_wq_ll *wq; | 	struct mlx5_wq_ll *wq; | ||||||
| 	u16 head; | 	u16 head; | ||||||
|  | @ -1106,8 +1142,12 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) | ||||||
| 	wq = &rq->mpwqe.wq; | 	wq = &rq->mpwqe.wq; | ||||||
| 	head = wq->head; | 	head = wq->head; | ||||||
| 
 | 
 | ||||||
| 	/* Outstanding UMR WQEs (in progress) start at wq->head */ | 	/* Release WQEs that are in missing state: they have been
 | ||||||
| 	for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { | 	 * popped from the list after completion but were not freed | ||||||
|  | 	 * due to deferred release. | ||||||
|  | 	 * Also free the linked-list reserved entry, hence the "+ 1". | ||||||
|  | 	 */ | ||||||
|  | 	for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) { | ||||||
| 		rq->dealloc_wqe(rq, head); | 		rq->dealloc_wqe(rq, head); | ||||||
| 		head = mlx5_wq_ll_get_wqe_next_ix(wq, head); | 		head = mlx5_wq_ll_get_wqe_next_ix(wq, head); | ||||||
| 	} | 	} | ||||||
|  | @ -1134,7 +1174,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) | ||||||
| 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { | 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { | ||||||
| 		struct mlx5_wq_ll *wq = &rq->mpwqe.wq; | 		struct mlx5_wq_ll *wq = &rq->mpwqe.wq; | ||||||
| 
 | 
 | ||||||
| 		mlx5e_free_rx_in_progress_descs(rq); | 		mlx5e_free_rx_missing_descs(rq); | ||||||
| 
 | 
 | ||||||
| 		while (!mlx5_wq_ll_is_empty(wq)) { | 		while (!mlx5_wq_ll_is_empty(wq)) { | ||||||
| 			struct mlx5e_rx_wqe_ll *wqe; | 			struct mlx5e_rx_wqe_ll *wqe; | ||||||
|  | @ -1152,12 +1192,21 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) | ||||||
| 						0, true); | 						0, true); | ||||||
| 	} else { | 	} else { | ||||||
| 		struct mlx5_wq_cyc *wq = &rq->wqe.wq; | 		struct mlx5_wq_cyc *wq = &rq->wqe.wq; | ||||||
|  | 		u16 missing = mlx5_wq_cyc_missing(wq); | ||||||
|  | 		u16 head = mlx5_wq_cyc_get_head(wq); | ||||||
| 
 | 
 | ||||||
| 		while (!mlx5_wq_cyc_is_empty(wq)) { | 		while (!mlx5_wq_cyc_is_empty(wq)) { | ||||||
| 			wqe_ix = mlx5_wq_cyc_get_tail(wq); | 			wqe_ix = mlx5_wq_cyc_get_tail(wq); | ||||||
| 			rq->dealloc_wqe(rq, wqe_ix); | 			rq->dealloc_wqe(rq, wqe_ix); | ||||||
| 			mlx5_wq_cyc_pop(wq); | 			mlx5_wq_cyc_pop(wq); | ||||||
| 		} | 		} | ||||||
|  | 		/* Missing slots might also contain unreleased pages due to
 | ||||||
|  | 		 * deferred release. | ||||||
|  | 		 */ | ||||||
|  | 		while (missing--) { | ||||||
|  | 			wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++); | ||||||
|  | 			rq->dealloc_wqe(rq, wqe_ix); | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -271,98 +271,35 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, | ||||||
| 	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); | 	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) | #define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) | ||||||
|  | 
 | ||||||
|  | static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, | ||||||
|  | 				       struct mlx5e_frag_page *frag_page) | ||||||
| { | { | ||||||
| 	struct mlx5e_page_cache *cache = &rq->page_cache; | 	struct page *page; | ||||||
| 	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); |  | ||||||
| 	struct mlx5e_rq_stats *stats = rq->stats; |  | ||||||
| 
 | 
 | ||||||
| 	if (tail_next == cache->head) { | 	page = page_pool_dev_alloc_pages(rq->page_pool); | ||||||
| 		stats->cache_full++; | 	if (unlikely(!page)) | ||||||
| 		return false; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (!dev_page_is_reusable(page)) { |  | ||||||
| 		stats->cache_waive++; |  | ||||||
| 		return false; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	cache->page_cache[cache->tail] = page; |  | ||||||
| 	cache->tail = tail_next; |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) |  | ||||||
| { |  | ||||||
| 	struct mlx5e_page_cache *cache = &rq->page_cache; |  | ||||||
| 	struct mlx5e_rq_stats *stats = rq->stats; |  | ||||||
| 	dma_addr_t addr; |  | ||||||
| 
 |  | ||||||
| 	if (unlikely(cache->head == cache->tail)) { |  | ||||||
| 		stats->cache_empty++; |  | ||||||
| 		return false; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (page_ref_count(cache->page_cache[cache->head]) != 1) { |  | ||||||
| 		stats->cache_busy++; |  | ||||||
| 		return false; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	au->page = cache->page_cache[cache->head]; |  | ||||||
| 	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); |  | ||||||
| 	stats->cache_reuse++; |  | ||||||
| 
 |  | ||||||
| 	addr = page_pool_get_dma_addr(au->page); |  | ||||||
| 	/* Non-XSK always uses PAGE_SIZE. */ |  | ||||||
| 	dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) |  | ||||||
| { |  | ||||||
| 	dma_addr_t addr; |  | ||||||
| 
 |  | ||||||
| 	if (mlx5e_rx_cache_get(rq, au)) |  | ||||||
| 		return 0; |  | ||||||
| 
 |  | ||||||
| 	au->page = page_pool_dev_alloc_pages(rq->page_pool); |  | ||||||
| 	if (unlikely(!au->page)) |  | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 
 | 
 | ||||||
| 	/* Non-XSK always uses PAGE_SIZE. */ | 	page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); | ||||||
| 	addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); | 
 | ||||||
| 	if (unlikely(dma_mapping_error(rq->pdev, addr))) { | 	*frag_page = (struct mlx5e_frag_page) { | ||||||
| 		page_pool_recycle_direct(rq->page_pool, au->page); | 		.page	= page, | ||||||
| 		au->page = NULL; | 		.frags	= 0, | ||||||
| 		return -ENOMEM; | 	}; | ||||||
| 	} |  | ||||||
| 	page_pool_set_dma_addr(au->page, addr); |  | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) | static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, | ||||||
|  | 					  struct mlx5e_frag_page *frag_page) | ||||||
| { | { | ||||||
| 	dma_addr_t dma_addr = page_pool_get_dma_addr(page); | 	u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; | ||||||
|  | 	struct page *page = frag_page->page; | ||||||
| 
 | 
 | ||||||
| 	dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir, | 	if (page_pool_defrag_page(page, drain_count) == 0) | ||||||
| 			     DMA_ATTR_SKIP_CPU_SYNC); | 		page_pool_put_defragged_page(rq->page_pool, page, -1, true); | ||||||
| 	page_pool_set_dma_addr(page, 0); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) |  | ||||||
| { |  | ||||||
| 	if (likely(recycle)) { |  | ||||||
| 		if (mlx5e_rx_cache_put(rq, page)) |  | ||||||
| 			return; |  | ||||||
| 
 |  | ||||||
| 		mlx5e_page_dma_unmap(rq, page); |  | ||||||
| 		page_pool_recycle_direct(rq->page_pool, page); |  | ||||||
| 	} else { |  | ||||||
| 		mlx5e_page_dma_unmap(rq, page); |  | ||||||
| 		page_pool_release_page(rq->page_pool, page); |  | ||||||
| 		put_page(page); |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, | static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, | ||||||
|  | @ -371,22 +308,31 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
| 
 | 
 | ||||||
| 	if (!frag->offset) | 	if (!frag->offset) | ||||||
| 		/* On first frag (offset == 0), replenish page (alloc_unit actually).
 | 		/* On first frag (offset == 0), replenish page.
 | ||||||
| 		 * Other frags that point to the same alloc_unit (with a different | 		 * Other frags that point to the same page (with a different | ||||||
| 		 * offset) should just use the new one without replenishing again | 		 * offset) should just use the new one without replenishing again | ||||||
| 		 * by themselves. | 		 * by themselves. | ||||||
| 		 */ | 		 */ | ||||||
| 		err = mlx5e_page_alloc_pool(rq, frag->au); | 		err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); | ||||||
| 
 | 
 | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, | static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag) | ||||||
| 				     struct mlx5e_wqe_frag_info *frag, |  | ||||||
| 				     bool recycle) |  | ||||||
| { | { | ||||||
| 	if (frag->last_in_page) | #define CAN_RELEASE_MASK \ | ||||||
| 		mlx5e_page_release_dynamic(rq, frag->au->page, recycle); | 	(BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | BIT(MLX5E_WQE_FRAG_SKIP_RELEASE)) | ||||||
|  | 
 | ||||||
|  | #define CAN_RELEASE_VALUE BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | ||||||
|  | 
 | ||||||
|  | 	return (frag->flags & CAN_RELEASE_MASK) == CAN_RELEASE_VALUE; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, | ||||||
|  | 				     struct mlx5e_wqe_frag_info *frag) | ||||||
|  | { | ||||||
|  | 	if (mlx5e_frag_can_release(frag)) | ||||||
|  | 		mlx5e_page_release_fragmented(rq, frag->frag_page); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) | static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) | ||||||
|  | @ -409,8 +355,10 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, | ||||||
| 		if (unlikely(err)) | 		if (unlikely(err)) | ||||||
| 			goto free_frags; | 			goto free_frags; | ||||||
| 
 | 
 | ||||||
|  | 		frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
|  | 
 | ||||||
| 		headroom = i == 0 ? rq->buff.headroom : 0; | 		headroom = i == 0 ? rq->buff.headroom : 0; | ||||||
| 		addr = page_pool_get_dma_addr(frag->au->page); | 		addr = page_pool_get_dma_addr(frag->frag_page->page); | ||||||
| 		wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); | 		wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -418,35 +366,66 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, | ||||||
| 
 | 
 | ||||||
| free_frags: | free_frags: | ||||||
| 	while (--i >= 0) | 	while (--i >= 0) | ||||||
| 		mlx5e_put_rx_frag(rq, --frag, true); | 		mlx5e_put_rx_frag(rq, --frag); | ||||||
| 
 | 
 | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, | static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, | ||||||
| 				     struct mlx5e_wqe_frag_info *wi, | 				     struct mlx5e_wqe_frag_info *wi) | ||||||
| 				     bool recycle) |  | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	if (rq->xsk_pool) { |  | ||||||
| 		/* The `recycle` parameter is ignored, and the page is always
 |  | ||||||
| 		 * put into the Reuse Ring, because there is no way to return |  | ||||||
| 		 * the page to the userspace when the interface goes down. |  | ||||||
| 		 */ |  | ||||||
| 		xsk_buff_free(wi->au->xsk); |  | ||||||
| 		return; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) | 	for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) | ||||||
| 		mlx5e_put_rx_frag(rq, wi, recycle); | 		mlx5e_put_rx_frag(rq, wi); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi) | ||||||
|  | { | ||||||
|  | 	if (!(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))) | ||||||
|  | 		xsk_buff_free(*wi->xskp); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) | static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| { | { | ||||||
| 	struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); | 	struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); | ||||||
| 
 | 
 | ||||||
| 	mlx5e_free_rx_wqe(rq, wi, false); | 	if (rq->xsk_pool) | ||||||
|  | 		mlx5e_xsk_free_rx_wqe(wi); | ||||||
|  | 	else | ||||||
|  | 		mlx5e_free_rx_wqe(rq, wi); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
|  | { | ||||||
|  | 	struct mlx5_wq_cyc *wq = &rq->wqe.wq; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < wqe_bulk; i++) { | ||||||
|  | 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); | ||||||
|  | 		struct mlx5e_wqe_frag_info *wi; | ||||||
|  | 
 | ||||||
|  | 		wi = get_frag(rq, j); | ||||||
|  | 		/* The page is always put into the Reuse Ring, because there
 | ||||||
|  | 		 * is no way to return the page to the userspace when the | ||||||
|  | 		 * interface goes down. | ||||||
|  | 		 */ | ||||||
|  | 		mlx5e_xsk_free_rx_wqe(wi); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
|  | { | ||||||
|  | 	struct mlx5_wq_cyc *wq = &rq->wqe.wq; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < wqe_bulk; i++) { | ||||||
|  | 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); | ||||||
|  | 		struct mlx5e_wqe_frag_info *wi; | ||||||
|  | 
 | ||||||
|  | 		wi = get_frag(rq, j); | ||||||
|  | 		mlx5e_free_rx_wqe(rq, wi); | ||||||
|  | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
|  | @ -467,18 +446,42 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
| 	return i; | 	return i; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) | ||||||
|  | { | ||||||
|  | 	int remaining = wqe_bulk; | ||||||
|  | 	int i = 0; | ||||||
|  | 
 | ||||||
|  | 	/* The WQE bulk is split into smaller bulks that are sized
 | ||||||
|  | 	 * according to the page pool cache refill size to avoid overflowing | ||||||
|  | 	 * the page pool cache due to too many page releases at once. | ||||||
|  | 	 */ | ||||||
|  | 	do { | ||||||
|  | 		int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); | ||||||
|  | 		int alloc_count; | ||||||
|  | 
 | ||||||
|  | 		mlx5e_free_rx_wqes(rq, ix + i, refill); | ||||||
|  | 		alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); | ||||||
|  | 		i += alloc_count; | ||||||
|  | 		if (unlikely(alloc_count != refill)) | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		remaining -= refill; | ||||||
|  | 	} while (remaining); | ||||||
|  | 
 | ||||||
|  | 	return i; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline void | static inline void | ||||||
| mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, | mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, | ||||||
| 		   union mlx5e_alloc_unit *au, u32 frag_offset, u32 len, | 		   struct page *page, u32 frag_offset, u32 len, | ||||||
| 		   unsigned int truesize) | 		   unsigned int truesize) | ||||||
| { | { | ||||||
| 	dma_addr_t addr = page_pool_get_dma_addr(au->page); | 	dma_addr_t addr = page_pool_get_dma_addr(page); | ||||||
| 
 | 
 | ||||||
| 	dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, | 	dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, | ||||||
| 				rq->buff.map_dir); | 				rq->buff.map_dir); | ||||||
| 	page_ref_inc(au->page); |  | ||||||
| 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, | 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, | ||||||
| 			au->page, frag_offset, len, truesize); | 			page, frag_offset, len, truesize); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void | static inline void | ||||||
|  | @ -496,30 +499,36 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void | static void | ||||||
| mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) | mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) | ||||||
| { | { | ||||||
| 	union mlx5e_alloc_unit *alloc_units = wi->alloc_units; |  | ||||||
| 	bool no_xdp_xmit; | 	bool no_xdp_xmit; | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	/* A common case for AF_XDP. */ | 	/* A common case for AF_XDP. */ | ||||||
| 	if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) | 	if (bitmap_full(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); | 	no_xdp_xmit = bitmap_empty(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); | ||||||
| 
 | 
 | ||||||
| 	if (rq->xsk_pool) { | 	if (rq->xsk_pool) { | ||||||
| 		/* The `recycle` parameter is ignored, and the page is always
 | 		struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs; | ||||||
| 		 * put into the Reuse Ring, because there is no way to return | 
 | ||||||
| 		 * the page to the userspace when the interface goes down. | 		/* The page is always put into the Reuse Ring, because there
 | ||||||
|  | 		 * is no way to return the page to userspace when the interface | ||||||
|  | 		 * goes down. | ||||||
| 		 */ | 		 */ | ||||||
| 		for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) | 		for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) | ||||||
| 			if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) | 			if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) | ||||||
| 				xsk_buff_free(alloc_units[i].xsk); | 				xsk_buff_free(xsk_buffs[i]); | ||||||
| 	} else { | 	} else { | ||||||
| 		for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) | 		for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) { | ||||||
| 			if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) | 			if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) { | ||||||
| 				mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle); | 				struct mlx5e_frag_page *frag_page; | ||||||
|  | 
 | ||||||
|  | 				frag_page = &wi->alloc_units.frag_pages[i]; | ||||||
|  | 				mlx5e_page_release_fragmented(rq, frag_page); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -583,7 +592,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, | ||||||
| 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; | 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; | ||||||
| 	u16 entries, pi, header_offset, err, wqe_bbs, new_entries; | 	u16 entries, pi, header_offset, err, wqe_bbs, new_entries; | ||||||
| 	u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; | 	u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; | ||||||
| 	struct page *page = shampo->last_page; | 	u16 page_index = shampo->curr_page_index; | ||||||
|  | 	struct mlx5e_frag_page *frag_page; | ||||||
| 	u64 addr = shampo->last_addr; | 	u64 addr = shampo->last_addr; | ||||||
| 	struct mlx5e_dma_info *dma_info; | 	struct mlx5e_dma_info *dma_info; | ||||||
| 	struct mlx5e_umr_wqe *umr_wqe; | 	struct mlx5e_umr_wqe *umr_wqe; | ||||||
|  | @ -597,6 +607,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, | ||||||
| 	umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); | 	umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); | ||||||
| 	build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); | 	build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); | ||||||
| 
 | 
 | ||||||
|  | 	frag_page = &shampo->pages[page_index]; | ||||||
|  | 
 | ||||||
| 	for (i = 0; i < entries; i++, index++) { | 	for (i = 0; i < entries; i++, index++) { | ||||||
| 		dma_info = &shampo->info[index]; | 		dma_info = &shampo->info[index]; | ||||||
| 		if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < | 		if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < | ||||||
|  | @ -605,16 +617,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, | ||||||
| 		header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << | 		header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << | ||||||
| 			MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; | 			MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; | ||||||
| 		if (!(header_offset & (PAGE_SIZE - 1))) { | 		if (!(header_offset & (PAGE_SIZE - 1))) { | ||||||
| 			union mlx5e_alloc_unit au; | 			page_index = (page_index + 1) & (shampo->hd_per_wq - 1); | ||||||
|  | 			frag_page = &shampo->pages[page_index]; | ||||||
| 
 | 
 | ||||||
| 			err = mlx5e_page_alloc_pool(rq, &au); | 			err = mlx5e_page_alloc_fragmented(rq, frag_page); | ||||||
| 			if (unlikely(err)) | 			if (unlikely(err)) | ||||||
| 				goto err_unmap; | 				goto err_unmap; | ||||||
| 			page = dma_info->page = au.page; | 
 | ||||||
| 			addr = dma_info->addr = page_pool_get_dma_addr(au.page); | 			addr = page_pool_get_dma_addr(frag_page->page); | ||||||
|  | 
 | ||||||
|  | 			dma_info->addr = addr; | ||||||
|  | 			dma_info->frag_page = frag_page; | ||||||
| 		} else { | 		} else { | ||||||
| 			dma_info->addr = addr + header_offset; | 			dma_info->addr = addr + header_offset; | ||||||
| 			dma_info->page = page; | 			dma_info->frag_page = frag_page; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| update_klm: | update_klm: | ||||||
|  | @ -632,7 +648,7 @@ update_klm: | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
| 	shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); | 	shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); | ||||||
| 	shampo->last_page = page; | 	shampo->curr_page_index = page_index; | ||||||
| 	shampo->last_addr = addr; | 	shampo->last_addr = addr; | ||||||
| 	sq->pc += wqe_bbs; | 	sq->pc += wqe_bbs; | ||||||
| 	sq->doorbell_cseg = &umr_wqe->ctrl; | 	sq->doorbell_cseg = &umr_wqe->ctrl; | ||||||
|  | @ -644,7 +660,7 @@ err_unmap: | ||||||
| 		dma_info = &shampo->info[--index]; | 		dma_info = &shampo->info[--index]; | ||||||
| 		if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { | 		if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { | ||||||
| 			dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); | 			dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); | ||||||
| 			mlx5e_page_release_dynamic(rq, dma_info->page, true); | 			mlx5e_page_release_fragmented(rq, dma_info->frag_page); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	rq->stats->buff_alloc_err++; | 	rq->stats->buff_alloc_err++; | ||||||
|  | @ -693,8 +709,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) | ||||||
| static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| { | { | ||||||
| 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); | 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); | ||||||
| 	union mlx5e_alloc_unit *au = &wi->alloc_units[0]; |  | ||||||
| 	struct mlx5e_icosq *sq = rq->icosq; | 	struct mlx5e_icosq *sq = rq->icosq; | ||||||
|  | 	struct mlx5e_frag_page *frag_page; | ||||||
| 	struct mlx5_wq_cyc *wq = &sq->wq; | 	struct mlx5_wq_cyc *wq = &sq->wq; | ||||||
| 	struct mlx5e_umr_wqe *umr_wqe; | 	struct mlx5e_umr_wqe *umr_wqe; | ||||||
| 	u32 offset; /* 17-bit value with MTT. */ | 	u32 offset; /* 17-bit value with MTT. */ | ||||||
|  | @ -712,13 +728,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); | 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); | ||||||
| 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); | 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { | 	frag_page = &wi->alloc_units.frag_pages[0]; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { | ||||||
| 		dma_addr_t addr; | 		dma_addr_t addr; | ||||||
| 
 | 
 | ||||||
| 		err = mlx5e_page_alloc_pool(rq, au); | 		err = mlx5e_page_alloc_fragmented(rq, frag_page); | ||||||
| 		if (unlikely(err)) | 		if (unlikely(err)) | ||||||
| 			goto err_unmap; | 			goto err_unmap; | ||||||
| 		addr = page_pool_get_dma_addr(au->page); | 		addr = page_pool_get_dma_addr(frag_page->page); | ||||||
| 		umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { | 		umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { | ||||||
| 			.ptag = cpu_to_be64(addr | MLX5_EN_WR), | 			.ptag = cpu_to_be64(addr | MLX5_EN_WR), | ||||||
| 		}; | 		}; | ||||||
|  | @ -735,7 +753,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 		       sizeof(*umr_wqe->inline_mtts) * pad); | 		       sizeof(*umr_wqe->inline_mtts) * pad); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); | 	bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); | ||||||
| 	wi->consumed_strides = 0; | 	wi->consumed_strides = 0; | ||||||
| 
 | 
 | ||||||
| 	umr_wqe->ctrl.opmod_idx_opcode = | 	umr_wqe->ctrl.opmod_idx_opcode = | ||||||
|  | @ -759,8 +777,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| 
 | 
 | ||||||
| err_unmap: | err_unmap: | ||||||
| 	while (--i >= 0) { | 	while (--i >= 0) { | ||||||
| 		au--; | 		frag_page--; | ||||||
| 		mlx5e_page_release_dynamic(rq, au->page, true); | 		mlx5e_page_release_fragmented(rq, frag_page); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| err: | err: | ||||||
|  | @ -778,8 +796,8 @@ err: | ||||||
| void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) | void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) | ||||||
| { | { | ||||||
| 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; | 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; | ||||||
|  | 	struct mlx5e_frag_page *deleted_page = NULL; | ||||||
| 	int hd_per_wq = shampo->hd_per_wq; | 	int hd_per_wq = shampo->hd_per_wq; | ||||||
| 	struct page *deleted_page = NULL; |  | ||||||
| 	struct mlx5e_dma_info *hd_info; | 	struct mlx5e_dma_info *hd_info; | ||||||
| 	int i, index = start; | 	int i, index = start; | ||||||
| 
 | 
 | ||||||
|  | @ -792,10 +810,12 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close | ||||||
| 
 | 
 | ||||||
| 		hd_info = &shampo->info[index]; | 		hd_info = &shampo->info[index]; | ||||||
| 		hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); | 		hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); | ||||||
| 		if (hd_info->page != deleted_page) { | 		if (hd_info->frag_page && hd_info->frag_page != deleted_page) { | ||||||
| 			deleted_page = hd_info->page; | 			deleted_page = hd_info->frag_page; | ||||||
| 			mlx5e_page_release_dynamic(rq, hd_info->page, false); | 			mlx5e_page_release_fragmented(rq, hd_info->frag_page); | ||||||
| 		} | 		} | ||||||
|  | 
 | ||||||
|  | 		hd_info->frag_page = NULL; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (start + len > hd_per_wq) { | 	if (start + len > hd_per_wq) { | ||||||
|  | @ -810,8 +830,8 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close | ||||||
| static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) | ||||||
| { | { | ||||||
| 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); | 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); | ||||||
| 	/* Don't recycle, this function is called on rq/netdev close */ | 	/* This function is called on rq/netdev close. */ | ||||||
| 	mlx5e_free_rx_mpwqe(rq, wi, false); | 	mlx5e_free_rx_mpwqe(rq, wi); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) | INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) | ||||||
|  | @ -838,17 +858,20 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) | ||||||
| 	 */ | 	 */ | ||||||
| 	wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; | 	wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; | ||||||
| 
 | 
 | ||||||
| 	if (!rq->xsk_pool) | 	if (!rq->xsk_pool) { | ||||||
| 		count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); | 		count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk); | ||||||
| 	else if (likely(!rq->xsk_pool->dma_need_sync)) | 	} else if (likely(!rq->xsk_pool->dma_need_sync)) { | ||||||
|  | 		mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); | ||||||
| 		count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); | 		count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); | ||||||
| 	else | 	} else { | ||||||
|  | 		mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); | ||||||
| 		/* If dma_need_sync is true, it's more efficient to call
 | 		/* If dma_need_sync is true, it's more efficient to call
 | ||||||
| 		 * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, | 		 * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, | ||||||
| 		 * because the latter does the same check and returns only one | 		 * because the latter does the same check and returns only one | ||||||
| 		 * frame. | 		 * frame. | ||||||
| 		 */ | 		 */ | ||||||
| 		count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); | 		count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	mlx5_wq_cyc_push_n(wq, count); | 	mlx5_wq_cyc_push_n(wq, count); | ||||||
| 	if (unlikely(count != wqe_bulk)) { | 	if (unlikely(count != wqe_bulk)) { | ||||||
|  | @ -1029,6 +1052,11 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) | ||||||
| 	head = rq->mpwqe.actual_wq_head; | 	head = rq->mpwqe.actual_wq_head; | ||||||
| 	i = missing; | 	i = missing; | ||||||
| 	do { | 	do { | ||||||
|  | 		struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head); | ||||||
|  | 
 | ||||||
|  | 		/* Deferred free for better page pool cache usage. */ | ||||||
|  | 		mlx5e_free_rx_mpwqe(rq, wi); | ||||||
|  | 
 | ||||||
| 		alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : | 		alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : | ||||||
| 					   mlx5e_alloc_rx_mpwqe(rq, head); | 					   mlx5e_alloc_rx_mpwqe(rq, head); | ||||||
| 
 | 
 | ||||||
|  | @ -1133,7 +1161,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) | ||||||
| 	struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; | 	struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; | ||||||
| 	u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; | 	u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; | ||||||
| 
 | 
 | ||||||
| 	return page_address(last_head->page) + head_offset; | 	return page_address(last_head->frag_page->page) + head_offset; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) | static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) | ||||||
|  | @ -1586,7 +1614,7 @@ static struct sk_buff * | ||||||
| mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, | mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, | ||||||
| 			  struct mlx5_cqe64 *cqe, u32 cqe_bcnt) | 			  struct mlx5_cqe64 *cqe, u32 cqe_bcnt) | ||||||
| { | { | ||||||
| 	union mlx5e_alloc_unit *au = wi->au; | 	struct mlx5e_frag_page *frag_page = wi->frag_page; | ||||||
| 	u16 rx_headroom = rq->buff.headroom; | 	u16 rx_headroom = rq->buff.headroom; | ||||||
| 	struct bpf_prog *prog; | 	struct bpf_prog *prog; | ||||||
| 	struct sk_buff *skb; | 	struct sk_buff *skb; | ||||||
|  | @ -1595,11 +1623,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, | ||||||
| 	dma_addr_t addr; | 	dma_addr_t addr; | ||||||
| 	u32 frag_size; | 	u32 frag_size; | ||||||
| 
 | 
 | ||||||
| 	va             = page_address(au->page) + wi->offset; | 	va             = page_address(frag_page->page) + wi->offset; | ||||||
| 	data           = va + rx_headroom; | 	data           = va + rx_headroom; | ||||||
| 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); | 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); | ||||||
| 
 | 
 | ||||||
| 	addr = page_pool_get_dma_addr(au->page); | 	addr = page_pool_get_dma_addr(frag_page->page); | ||||||
| 	dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, | 	dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, | ||||||
| 				      frag_size, rq->buff.map_dir); | 				      frag_size, rq->buff.map_dir); | ||||||
| 	net_prefetch(data); | 	net_prefetch(data); | ||||||
|  | @ -1623,7 +1651,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	/* queue up for recycling/reuse */ | 	/* queue up for recycling/reuse */ | ||||||
| 	page_ref_inc(au->page); | 	skb_mark_for_recycle(skb); | ||||||
|  | 	frag_page->frags++; | ||||||
| 
 | 
 | ||||||
| 	return skb; | 	return skb; | ||||||
| } | } | ||||||
|  | @ -1634,8 +1663,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi | ||||||
| { | { | ||||||
| 	struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; | 	struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; | ||||||
| 	struct mlx5e_wqe_frag_info *head_wi = wi; | 	struct mlx5e_wqe_frag_info *head_wi = wi; | ||||||
| 	union mlx5e_alloc_unit *au = wi->au; |  | ||||||
| 	u16 rx_headroom = rq->buff.headroom; | 	u16 rx_headroom = rq->buff.headroom; | ||||||
|  | 	struct mlx5e_frag_page *frag_page; | ||||||
| 	struct skb_shared_info *sinfo; | 	struct skb_shared_info *sinfo; | ||||||
| 	struct mlx5e_xdp_buff mxbuf; | 	struct mlx5e_xdp_buff mxbuf; | ||||||
| 	u32 frag_consumed_bytes; | 	u32 frag_consumed_bytes; | ||||||
|  | @ -1645,10 +1674,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi | ||||||
| 	u32 truesize; | 	u32 truesize; | ||||||
| 	void *va; | 	void *va; | ||||||
| 
 | 
 | ||||||
| 	va = page_address(au->page) + wi->offset; | 	frag_page = wi->frag_page; | ||||||
|  | 
 | ||||||
|  | 	va = page_address(frag_page->page) + wi->offset; | ||||||
| 	frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); | 	frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); | ||||||
| 
 | 
 | ||||||
| 	addr = page_pool_get_dma_addr(au->page); | 	addr = page_pool_get_dma_addr(frag_page->page); | ||||||
| 	dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, | 	dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, | ||||||
| 				      rq->buff.frame0_sz, rq->buff.map_dir); | 				      rq->buff.frame0_sz, rq->buff.map_dir); | ||||||
| 	net_prefetchw(va); /* xdp_frame data area */ | 	net_prefetchw(va); /* xdp_frame data area */ | ||||||
|  | @ -1665,11 +1696,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi | ||||||
| 	while (cqe_bcnt) { | 	while (cqe_bcnt) { | ||||||
| 		skb_frag_t *frag; | 		skb_frag_t *frag; | ||||||
| 
 | 
 | ||||||
| 		au = wi->au; | 		frag_page = wi->frag_page; | ||||||
| 
 | 
 | ||||||
| 		frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); | 		frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); | ||||||
| 
 | 
 | ||||||
| 		addr = page_pool_get_dma_addr(au->page); | 		addr = page_pool_get_dma_addr(frag_page->page); | ||||||
| 		dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, | 		dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, | ||||||
| 					frag_consumed_bytes, rq->buff.map_dir); | 					frag_consumed_bytes, rq->buff.map_dir); | ||||||
| 
 | 
 | ||||||
|  | @ -1683,11 +1714,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		frag = &sinfo->frags[sinfo->nr_frags++]; | 		frag = &sinfo->frags[sinfo->nr_frags++]; | ||||||
| 		__skb_frag_set_page(frag, au->page); | 
 | ||||||
|  | 		__skb_frag_set_page(frag, frag_page->page); | ||||||
| 		skb_frag_off_set(frag, wi->offset); | 		skb_frag_off_set(frag, wi->offset); | ||||||
| 		skb_frag_size_set(frag, frag_consumed_bytes); | 		skb_frag_size_set(frag, frag_consumed_bytes); | ||||||
| 
 | 
 | ||||||
| 		if (page_is_pfmemalloc(au->page)) | 		if (page_is_pfmemalloc(frag_page->page)) | ||||||
| 			xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); | 			xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); | ||||||
| 
 | 
 | ||||||
| 		sinfo->xdp_frags_size += frag_consumed_bytes; | 		sinfo->xdp_frags_size += frag_consumed_bytes; | ||||||
|  | @ -1704,7 +1736,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi | ||||||
| 			int i; | 			int i; | ||||||
| 
 | 
 | ||||||
| 			for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) | 			for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) | ||||||
| 				mlx5e_put_rx_frag(rq, &head_wi[i], true); | 				mlx5e_put_rx_frag(rq, &head_wi[i]); | ||||||
| 		} | 		} | ||||||
| 		return NULL; /* page/packet was consumed by XDP */ | 		return NULL; /* page/packet was consumed by XDP */ | ||||||
| 	} | 	} | ||||||
|  | @ -1716,21 +1748,17 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi | ||||||
| 	if (unlikely(!skb)) | 	if (unlikely(!skb)) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	page_ref_inc(head_wi->au->page); | 	skb_mark_for_recycle(skb); | ||||||
|  | 	head_wi->frag_page->frags++; | ||||||
| 
 | 
 | ||||||
| 	if (xdp_buff_has_frags(&mxbuf.xdp)) { | 	if (xdp_buff_has_frags(&mxbuf.xdp)) { | ||||||
| 		int i; |  | ||||||
| 
 |  | ||||||
| 		/* sinfo->nr_frags is reset by build_skb, calculate again. */ | 		/* sinfo->nr_frags is reset by build_skb, calculate again. */ | ||||||
| 		xdp_update_skb_shared_info(skb, wi - head_wi - 1, | 		xdp_update_skb_shared_info(skb, wi - head_wi - 1, | ||||||
| 					   sinfo->xdp_frags_size, truesize, | 					   sinfo->xdp_frags_size, truesize, | ||||||
| 					   xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); | 					   xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < sinfo->nr_frags; i++) { | 		for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) | ||||||
| 			skb_frag_t *frag = &sinfo->frags[i]; | 			pwi->frag_page->frags++; | ||||||
| 
 |  | ||||||
| 			page_ref_inc(skb_frag_page(frag)); |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return skb; | 	return skb; | ||||||
|  | @ -1768,7 +1796,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | ||||||
| 		mlx5e_handle_rx_err_cqe(rq, cqe); | 		mlx5e_handle_rx_err_cqe(rq, cqe); | ||||||
| 		goto free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, | 	skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, | ||||||
|  | @ -1782,9 +1810,9 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 			/* do not return page to cache,
 | 			/* do not return page to cache,
 | ||||||
| 			 * it will be returned on XDP_TX completion. | 			 * it will be returned on XDP_TX completion. | ||||||
| 			 */ | 			 */ | ||||||
| 			goto wq_cyc_pop; | 			wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
| 		} | 		} | ||||||
| 		goto free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | ||||||
|  | @ -1792,13 +1820,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 	if (mlx5e_cqe_regb_chain(cqe)) | 	if (mlx5e_cqe_regb_chain(cqe)) | ||||||
| 		if (!mlx5e_tc_update_skb_nic(cqe, skb)) { | 		if (!mlx5e_tc_update_skb_nic(cqe, skb)) { | ||||||
| 			dev_kfree_skb_any(skb); | 			dev_kfree_skb_any(skb); | ||||||
| 			goto free_wqe; | 			goto wq_cyc_pop; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 	napi_gro_receive(rq->cq.napi, skb); | 	napi_gro_receive(rq->cq.napi, skb); | ||||||
| 
 | 
 | ||||||
| free_wqe: |  | ||||||
| 	mlx5e_free_rx_wqe(rq, wi, true); |  | ||||||
| wq_cyc_pop: | wq_cyc_pop: | ||||||
| 	mlx5_wq_cyc_pop(wq); | 	mlx5_wq_cyc_pop(wq); | ||||||
| } | } | ||||||
|  | @ -1822,7 +1848,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | ||||||
| 		mlx5e_handle_rx_err_cqe(rq, cqe); | 		mlx5e_handle_rx_err_cqe(rq, cqe); | ||||||
| 		goto free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, | 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, | ||||||
|  | @ -1835,9 +1861,9 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 			/* do not return page to cache,
 | 			/* do not return page to cache,
 | ||||||
| 			 * it will be returned on XDP_TX completion. | 			 * it will be returned on XDP_TX completion. | ||||||
| 			 */ | 			 */ | ||||||
| 			goto wq_cyc_pop; | 			wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); | ||||||
| 		} | 		} | ||||||
| 		goto free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | ||||||
|  | @ -1847,8 +1873,6 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 
 | 
 | ||||||
| 	mlx5e_rep_tc_receive(cqe, rq, skb); | 	mlx5e_rep_tc_receive(cqe, rq, skb); | ||||||
| 
 | 
 | ||||||
| free_wqe: |  | ||||||
| 	mlx5e_free_rx_wqe(rq, wi, true); |  | ||||||
| wq_cyc_pop: | wq_cyc_pop: | ||||||
| 	mlx5_wq_cyc_pop(wq); | 	mlx5_wq_cyc_pop(wq); | ||||||
| } | } | ||||||
|  | @ -1901,7 +1925,6 @@ mpwrq_cqe_out: | ||||||
| 
 | 
 | ||||||
| 	wq  = &rq->mpwqe.wq; | 	wq  = &rq->mpwqe.wq; | ||||||
| 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); | 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); | ||||||
| 	mlx5e_free_rx_mpwqe(rq, wi, true); |  | ||||||
| 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); | 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1913,7 +1936,8 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { | ||||||
| 
 | 
 | ||||||
| static void | static void | ||||||
| mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, | mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, | ||||||
| 		    union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset) | 		    struct mlx5e_frag_page *frag_page, | ||||||
|  | 		    u32 data_bcnt, u32 data_offset) | ||||||
| { | { | ||||||
| 	net_prefetchw(skb->data); | 	net_prefetchw(skb->data); | ||||||
| 
 | 
 | ||||||
|  | @ -1927,12 +1951,13 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, | ||||||
| 		else | 		else | ||||||
| 			truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); | 			truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); | ||||||
| 
 | 
 | ||||||
| 		mlx5e_add_skb_frag(rq, skb, au, data_offset, | 		frag_page->frags++; | ||||||
|  | 		mlx5e_add_skb_frag(rq, skb, frag_page->page, data_offset, | ||||||
| 				   pg_consumed_bytes, truesize); | 				   pg_consumed_bytes, truesize); | ||||||
| 
 | 
 | ||||||
| 		data_bcnt -= pg_consumed_bytes; | 		data_bcnt -= pg_consumed_bytes; | ||||||
| 		data_offset = 0; | 		data_offset = 0; | ||||||
| 		au++; | 		frag_page++; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1941,11 +1966,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w | ||||||
| 				   struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, | 				   struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, | ||||||
| 				   u32 page_idx) | 				   u32 page_idx) | ||||||
| { | { | ||||||
| 	union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; | 	struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; | ||||||
| 	u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); | 	u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); | ||||||
|  | 	struct mlx5e_frag_page *head_page = frag_page; | ||||||
| 	u32 frag_offset    = head_offset + headlen; | 	u32 frag_offset    = head_offset + headlen; | ||||||
| 	u32 byte_cnt       = cqe_bcnt - headlen; | 	u32 byte_cnt       = cqe_bcnt - headlen; | ||||||
| 	union mlx5e_alloc_unit *head_au = au; |  | ||||||
| 	struct sk_buff *skb; | 	struct sk_buff *skb; | ||||||
| 	dma_addr_t addr; | 	dma_addr_t addr; | ||||||
| 
 | 
 | ||||||
|  | @ -1960,14 +1985,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w | ||||||
| 
 | 
 | ||||||
| 	/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ | 	/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ | ||||||
| 	if (unlikely(frag_offset >= PAGE_SIZE)) { | 	if (unlikely(frag_offset >= PAGE_SIZE)) { | ||||||
| 		au++; | 		frag_page++; | ||||||
| 		frag_offset -= PAGE_SIZE; | 		frag_offset -= PAGE_SIZE; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); | 	skb_mark_for_recycle(skb); | ||||||
|  | 	mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset); | ||||||
| 	/* copy header */ | 	/* copy header */ | ||||||
| 	addr = page_pool_get_dma_addr(head_au->page); | 	addr = page_pool_get_dma_addr(head_page->page); | ||||||
| 	mlx5e_copy_skb_header(rq, skb, head_au->page, addr, | 	mlx5e_copy_skb_header(rq, skb, head_page->page, addr, | ||||||
| 			      head_offset, head_offset, headlen); | 			      head_offset, head_offset, headlen); | ||||||
| 	/* skb linear part was allocated with headlen and aligned to long */ | 	/* skb linear part was allocated with headlen and aligned to long */ | ||||||
| 	skb->tail += headlen; | 	skb->tail += headlen; | ||||||
|  | @ -1981,7 +2007,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 				struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, | 				struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, | ||||||
| 				u32 page_idx) | 				u32 page_idx) | ||||||
| { | { | ||||||
| 	union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; | 	struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; | ||||||
| 	u16 rx_headroom = rq->buff.headroom; | 	u16 rx_headroom = rq->buff.headroom; | ||||||
| 	struct bpf_prog *prog; | 	struct bpf_prog *prog; | ||||||
| 	struct sk_buff *skb; | 	struct sk_buff *skb; | ||||||
|  | @ -1996,11 +2022,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	va             = page_address(au->page) + head_offset; | 	va             = page_address(frag_page->page) + head_offset; | ||||||
| 	data           = va + rx_headroom; | 	data           = va + rx_headroom; | ||||||
| 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); | 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); | ||||||
| 
 | 
 | ||||||
| 	addr = page_pool_get_dma_addr(au->page); | 	addr = page_pool_get_dma_addr(frag_page->page); | ||||||
| 	dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, | 	dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, | ||||||
| 				      frag_size, rq->buff.map_dir); | 				      frag_size, rq->buff.map_dir); | ||||||
| 	net_prefetch(data); | 	net_prefetch(data); | ||||||
|  | @ -2013,7 +2039,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 		mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); | 		mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); | ||||||
| 		if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { | 		if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { | ||||||
| 			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) | 			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) | ||||||
| 				__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ | 				__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ | ||||||
| 			return NULL; /* page/packet was consumed by XDP */ | 			return NULL; /* page/packet was consumed by XDP */ | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | @ -2027,7 +2053,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	/* queue up for recycling/reuse */ | 	/* queue up for recycling/reuse */ | ||||||
| 	page_ref_inc(au->page); | 	skb_mark_for_recycle(skb); | ||||||
|  | 	frag_page->frags++; | ||||||
| 
 | 
 | ||||||
| 	return skb; | 	return skb; | ||||||
| } | } | ||||||
|  | @ -2044,7 +2071,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 	void *hdr, *data; | 	void *hdr, *data; | ||||||
| 	u32 frag_size; | 	u32 frag_size; | ||||||
| 
 | 
 | ||||||
| 	hdr		= page_address(head->page) + head_offset; | 	hdr		= page_address(head->frag_page->page) + head_offset; | ||||||
| 	data		= hdr + rx_headroom; | 	data		= hdr + rx_headroom; | ||||||
| 	frag_size	= MLX5_SKB_FRAG_SZ(rx_headroom + head_size); | 	frag_size	= MLX5_SKB_FRAG_SZ(rx_headroom + head_size); | ||||||
| 
 | 
 | ||||||
|  | @ -2058,9 +2085,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 		if (unlikely(!skb)) | 		if (unlikely(!skb)) | ||||||
| 			return NULL; | 			return NULL; | ||||||
| 
 | 
 | ||||||
| 		/* queue up for recycling/reuse */ | 		head->frag_page->frags++; | ||||||
| 		page_ref_inc(head->page); |  | ||||||
| 
 |  | ||||||
| 	} else { | 	} else { | ||||||
| 		/* allocate SKB and copy header for large header */ | 		/* allocate SKB and copy header for large header */ | ||||||
| 		rq->stats->gro_large_hds++; | 		rq->stats->gro_large_hds++; | ||||||
|  | @ -2072,13 +2097,17 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		prefetchw(skb->data); | 		prefetchw(skb->data); | ||||||
| 		mlx5e_copy_skb_header(rq, skb, head->page, head->addr, | 		mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr, | ||||||
| 				      head_offset + rx_headroom, | 				      head_offset + rx_headroom, | ||||||
| 				      rx_headroom, head_size); | 				      rx_headroom, head_size); | ||||||
| 		/* skb linear part was allocated with headlen and aligned to long */ | 		/* skb linear part was allocated with headlen and aligned to long */ | ||||||
| 		skb->tail += head_size; | 		skb->tail += head_size; | ||||||
| 		skb->len  += head_size; | 		skb->len  += head_size; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	/* queue up for recycling/reuse */ | ||||||
|  | 	skb_mark_for_recycle(skb); | ||||||
|  | 
 | ||||||
| 	return skb; | 	return skb; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2123,8 +2152,10 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) | ||||||
| 	u64 addr = shampo->info[header_index].addr; | 	u64 addr = shampo->info[header_index].addr; | ||||||
| 
 | 
 | ||||||
| 	if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { | 	if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { | ||||||
| 		shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); | 		struct mlx5e_dma_info *dma_info = &shampo->info[header_index]; | ||||||
| 		mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true); | 
 | ||||||
|  | 		dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE); | ||||||
|  | 		mlx5e_page_release_fragmented(rq, dma_info->frag_page); | ||||||
| 	} | 	} | ||||||
| 	bitmap_clear(shampo->bitmap, header_index, 1); | 	bitmap_clear(shampo->bitmap, header_index, 1); | ||||||
| } | } | ||||||
|  | @ -2145,7 +2176,6 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq | ||||||
| 	bool match		= cqe->shampo.match; | 	bool match		= cqe->shampo.match; | ||||||
| 	struct mlx5e_rq_stats *stats = rq->stats; | 	struct mlx5e_rq_stats *stats = rq->stats; | ||||||
| 	struct mlx5e_rx_wqe_ll *wqe; | 	struct mlx5e_rx_wqe_ll *wqe; | ||||||
| 	union mlx5e_alloc_unit *au; |  | ||||||
| 	struct mlx5e_mpw_info *wi; | 	struct mlx5e_mpw_info *wi; | ||||||
| 	struct mlx5_wq_ll *wq; | 	struct mlx5_wq_ll *wq; | ||||||
| 
 | 
 | ||||||
|  | @ -2195,8 +2225,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (likely(head_size)) { | 	if (likely(head_size)) { | ||||||
| 		au = &wi->alloc_units[page_idx]; | 		struct mlx5e_frag_page *frag_page; | ||||||
| 		mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset); | 
 | ||||||
|  | 		frag_page = &wi->alloc_units.frag_pages[page_idx]; | ||||||
|  | 		mlx5e_fill_skb_data(*skb, rq, frag_page, data_bcnt, data_offset); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); | 	mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); | ||||||
|  | @ -2210,7 +2242,6 @@ mpwrq_cqe_out: | ||||||
| 
 | 
 | ||||||
| 	wq  = &rq->mpwqe.wq; | 	wq  = &rq->mpwqe.wq; | ||||||
| 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); | 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); | ||||||
| 	mlx5e_free_rx_mpwqe(rq, wi, true); |  | ||||||
| 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); | 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2270,7 +2301,6 @@ mpwrq_cqe_out: | ||||||
| 
 | 
 | ||||||
| 	wq  = &rq->mpwqe.wq; | 	wq  = &rq->mpwqe.wq; | ||||||
| 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); | 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); | ||||||
| 	mlx5e_free_rx_mpwqe(rq, wi, true); |  | ||||||
| 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); | 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2489,7 +2519,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | ||||||
| 		rq->stats->wqe_err++; | 		rq->stats->wqe_err++; | ||||||
| 		goto wq_free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, | 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, | ||||||
|  | @ -2497,17 +2527,16 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) | ||||||
| 			      mlx5e_skb_from_cqe_nonlinear, | 			      mlx5e_skb_from_cqe_nonlinear, | ||||||
| 			      rq, wi, cqe, cqe_bcnt); | 			      rq, wi, cqe, cqe_bcnt); | ||||||
| 	if (!skb) | 	if (!skb) | ||||||
| 		goto wq_free_wqe; | 		goto wq_cyc_pop; | ||||||
| 
 | 
 | ||||||
| 	mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | 	mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | ||||||
| 	if (unlikely(!skb->dev)) { | 	if (unlikely(!skb->dev)) { | ||||||
| 		dev_kfree_skb_any(skb); | 		dev_kfree_skb_any(skb); | ||||||
| 		goto wq_free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 	napi_gro_receive(rq->cq.napi, skb); | 	napi_gro_receive(rq->cq.napi, skb); | ||||||
| 
 | 
 | ||||||
| wq_free_wqe: | wq_cyc_pop: | ||||||
| 	mlx5e_free_rx_wqe(rq, wi, true); |  | ||||||
| 	mlx5_wq_cyc_pop(wq); | 	mlx5_wq_cyc_pop(wq); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2582,12 +2611,12 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { | ||||||
| 		rq->stats->wqe_err++; | 		rq->stats->wqe_err++; | ||||||
| 		goto free_wqe; | 		goto wq_cyc_pop; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt); | 	skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt); | ||||||
| 	if (!skb) | 	if (!skb) | ||||||
| 		goto free_wqe; | 		goto wq_cyc_pop; | ||||||
| 
 | 
 | ||||||
| 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); | ||||||
| 	skb_push(skb, ETH_HLEN); | 	skb_push(skb, ETH_HLEN); | ||||||
|  | @ -2596,8 +2625,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe | ||||||
| 				 rq->netdev->devlink_port); | 				 rq->netdev->devlink_port); | ||||||
| 	dev_kfree_skb_any(skb); | 	dev_kfree_skb_any(skb); | ||||||
| 
 | 
 | ||||||
| free_wqe: | wq_cyc_pop: | ||||||
| 	mlx5e_free_rx_wqe(rq, wi, false); |  | ||||||
| 	mlx5_wq_cyc_pop(wq); | 	mlx5_wq_cyc_pop(wq); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -179,11 +179,6 @@ static const struct counter_desc sw_stats_desc[] = { | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, | 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, | 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, | 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, |  | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, |  | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, |  | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, |  | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, |  | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, | 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, | 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, | ||||||
| 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, | 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, | ||||||
|  | @ -358,11 +353,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, | ||||||
| 	s->rx_buff_alloc_err          += rq_stats->buff_alloc_err; | 	s->rx_buff_alloc_err          += rq_stats->buff_alloc_err; | ||||||
| 	s->rx_cqe_compress_blks       += rq_stats->cqe_compress_blks; | 	s->rx_cqe_compress_blks       += rq_stats->cqe_compress_blks; | ||||||
| 	s->rx_cqe_compress_pkts       += rq_stats->cqe_compress_pkts; | 	s->rx_cqe_compress_pkts       += rq_stats->cqe_compress_pkts; | ||||||
| 	s->rx_cache_reuse             += rq_stats->cache_reuse; |  | ||||||
| 	s->rx_cache_full              += rq_stats->cache_full; |  | ||||||
| 	s->rx_cache_empty             += rq_stats->cache_empty; |  | ||||||
| 	s->rx_cache_busy              += rq_stats->cache_busy; |  | ||||||
| 	s->rx_cache_waive             += rq_stats->cache_waive; |  | ||||||
| 	s->rx_congst_umr              += rq_stats->congst_umr; | 	s->rx_congst_umr              += rq_stats->congst_umr; | ||||||
| 	s->rx_arfs_err                += rq_stats->arfs_err; | 	s->rx_arfs_err                += rq_stats->arfs_err; | ||||||
| 	s->rx_recover                 += rq_stats->recover; | 	s->rx_recover                 += rq_stats->recover; | ||||||
|  | @ -1978,11 +1968,6 @@ static const struct counter_desc rq_stats_desc[] = { | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, | 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, | 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, | 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, |  | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, |  | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, |  | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, |  | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, |  | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, | 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, | 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, | ||||||
| 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, | 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, | ||||||
|  | @ -2163,11 +2148,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = { | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, | 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, | 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, | 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, |  | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, |  | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, |  | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, |  | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, |  | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, | 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, | 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, | ||||||
| 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, | 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, | ||||||
|  |  | ||||||
|  | @ -193,11 +193,6 @@ struct mlx5e_sw_stats { | ||||||
| 	u64 rx_buff_alloc_err; | 	u64 rx_buff_alloc_err; | ||||||
| 	u64 rx_cqe_compress_blks; | 	u64 rx_cqe_compress_blks; | ||||||
| 	u64 rx_cqe_compress_pkts; | 	u64 rx_cqe_compress_pkts; | ||||||
| 	u64 rx_cache_reuse; |  | ||||||
| 	u64 rx_cache_full; |  | ||||||
| 	u64 rx_cache_empty; |  | ||||||
| 	u64 rx_cache_busy; |  | ||||||
| 	u64 rx_cache_waive; |  | ||||||
| 	u64 rx_congst_umr; | 	u64 rx_congst_umr; | ||||||
| 	u64 rx_arfs_err; | 	u64 rx_arfs_err; | ||||||
| 	u64 rx_recover; | 	u64 rx_recover; | ||||||
|  | @ -362,11 +357,6 @@ struct mlx5e_rq_stats { | ||||||
| 	u64 buff_alloc_err; | 	u64 buff_alloc_err; | ||||||
| 	u64 cqe_compress_blks; | 	u64 cqe_compress_blks; | ||||||
| 	u64 cqe_compress_pkts; | 	u64 cqe_compress_pkts; | ||||||
| 	u64 cache_reuse; |  | ||||||
| 	u64 cache_full; |  | ||||||
| 	u64 cache_empty; |  | ||||||
| 	u64 cache_busy; |  | ||||||
| 	u64 cache_waive; |  | ||||||
| 	u64 congst_umr; | 	u64 congst_umr; | ||||||
| 	u64 arfs_err; | 	u64 arfs_err; | ||||||
| 	u64 recover; | 	u64 recover; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Jakub Kicinski
						Jakub Kicinski