2019-07-01 15:57:52 -07:00
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Google virtual Ethernet (gve) driver
*
2024-06-25 00:12:31 +00:00
* Copyright ( C ) 2015 - 2024 Google LLC
2019-07-01 15:57:52 -07:00
*/
2025-07-17 08:28:37 -07:00
# include <linux/bitmap.h>
2023-03-15 16:33:10 -07:00
# include <linux/bpf.h>
2019-07-01 15:57:52 -07:00
# include <linux/cpumask.h>
# include <linux/etherdevice.h>
2023-03-15 16:33:10 -07:00
# include <linux/filter.h>
2019-07-01 15:57:52 -07:00
# include <linux/interrupt.h>
2024-05-01 23:25:45 +00:00
# include <linux/irq.h>
2019-07-01 15:57:52 -07:00
# include <linux/module.h>
# include <linux/pci.h>
# include <linux/sched.h>
# include <linux/timer.h>
2019-07-01 15:57:54 -07:00
# include <linux/workqueue.h>
2022-11-17 08:27:00 -08:00
# include <linux/utsname.h>
# include <linux/version.h>
2024-05-01 23:25:49 +00:00
# include <net/netdev_queues.h>
2019-07-01 15:57:52 -07:00
# include <net/sch_generic.h>
2023-03-15 16:33:12 -07:00
# include <net/xdp_sock_drv.h>
2019-07-01 15:57:52 -07:00
# include "gve.h"
2021-06-24 11:06:28 -07:00
# include "gve_dqo.h"
2019-07-01 15:57:52 -07:00
# include "gve_adminq.h"
# include "gve_register.h"
2024-01-22 18:26:28 +00:00
# include "gve_utils.h"
2019-07-01 15:57:52 -07:00
2019-07-01 15:57:53 -07:00
# define GVE_DEFAULT_RX_COPYBREAK (256)
2019-07-01 15:57:52 -07:00
# define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
# define GVE_VERSION "1.0.0"
# define GVE_VERSION_PREFIX "GVE-"
2021-10-11 08:36:47 -07:00
// Minimum amount of time between queue kicks in msec (10 seconds)
# define MIN_TX_TIMEOUT_GAP (1000 * 10)
2023-07-08 11:14:51 +08:00
char gve_driver_name [ ] = " gve " ;
2019-07-01 15:57:55 -07:00
const char gve_version_str [ ] = GVE_VERSION ;
2019-07-01 15:57:52 -07:00
static const char gve_version_prefix [ ] = GVE_VERSION_PREFIX ;
2022-11-17 08:27:00 -08:00
static int gve_verify_driver_compatibility ( struct gve_priv * priv )
{
int err ;
struct gve_driver_info * driver_info ;
dma_addr_t driver_info_bus ;
driver_info = dma_alloc_coherent ( & priv - > pdev - > dev ,
sizeof ( struct gve_driver_info ) ,
& driver_info_bus , GFP_KERNEL ) ;
if ( ! driver_info )
return - ENOMEM ;
* driver_info = ( struct gve_driver_info ) {
. os_type = 1 , /* Linux */
. os_version_major = cpu_to_be32 ( LINUX_VERSION_MAJOR ) ,
. os_version_minor = cpu_to_be32 ( LINUX_VERSION_SUBLEVEL ) ,
. os_version_sub = cpu_to_be32 ( LINUX_VERSION_PATCHLEVEL ) ,
. driver_capability_flags = {
cpu_to_be64 ( GVE_DRIVER_CAPABILITY_FLAGS1 ) ,
cpu_to_be64 ( GVE_DRIVER_CAPABILITY_FLAGS2 ) ,
cpu_to_be64 ( GVE_DRIVER_CAPABILITY_FLAGS3 ) ,
cpu_to_be64 ( GVE_DRIVER_CAPABILITY_FLAGS4 ) ,
} ,
} ;
strscpy ( driver_info - > os_version_str1 , utsname ( ) - > release ,
sizeof ( driver_info - > os_version_str1 ) ) ;
strscpy ( driver_info - > os_version_str2 , utsname ( ) - > version ,
sizeof ( driver_info - > os_version_str2 ) ) ;
err = gve_adminq_verify_driver_compatibility ( priv ,
sizeof ( struct gve_driver_info ) ,
driver_info_bus ) ;
/* It's ok if the device doesn't support this */
if ( err = = - EOPNOTSUPP )
err = 0 ;
dma_free_coherent ( & priv - > pdev - > dev ,
sizeof ( struct gve_driver_info ) ,
driver_info , driver_info_bus ) ;
return err ;
}
2023-11-16 08:57:07 +00:00
static netdev_features_t gve_features_check ( struct sk_buff * skb ,
struct net_device * dev ,
netdev_features_t features )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
if ( ! gve_is_gqi ( priv ) )
return gve_features_check_dqo ( skb , dev , features ) ;
return features ;
}
2021-06-24 11:06:28 -07:00
static netdev_tx_t gve_start_xmit ( struct sk_buff * skb , struct net_device * dev )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
if ( gve_is_gqi ( priv ) )
return gve_tx ( skb , dev ) ;
else
return gve_tx_dqo ( skb , dev ) ;
}
2019-07-01 15:57:53 -07:00
static void gve_get_stats ( struct net_device * dev , struct rtnl_link_stats64 * s )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
unsigned int start ;
2021-10-05 17:30:30 -07:00
u64 packets , bytes ;
2023-03-15 16:33:08 -07:00
int num_tx_queues ;
2019-07-01 15:57:53 -07:00
int ring ;
2023-03-15 16:33:08 -07:00
num_tx_queues = gve_num_tx_queues ( priv ) ;
2019-07-01 15:57:53 -07:00
if ( priv - > rx ) {
for ( ring = 0 ; ring < priv - > rx_cfg . num_queues ; ring + + ) {
do {
2019-07-02 15:46:57 -07:00
start =
2022-10-26 15:22:14 +02:00
u64_stats_fetch_begin ( & priv - > rx [ ring ] . statss ) ;
2021-10-05 17:30:30 -07:00
packets = priv - > rx [ ring ] . rpackets ;
bytes = priv - > rx [ ring ] . rbytes ;
2022-10-26 15:22:14 +02:00
} while ( u64_stats_fetch_retry ( & priv - > rx [ ring ] . statss ,
2019-07-01 15:57:53 -07:00
start ) ) ;
2021-10-05 17:30:30 -07:00
s - > rx_packets + = packets ;
s - > rx_bytes + = bytes ;
2019-07-01 15:57:53 -07:00
}
}
if ( priv - > tx ) {
2023-03-15 16:33:08 -07:00
for ( ring = 0 ; ring < num_tx_queues ; ring + + ) {
2019-07-01 15:57:53 -07:00
do {
2019-07-02 15:46:57 -07:00
start =
2022-10-26 15:22:14 +02:00
u64_stats_fetch_begin ( & priv - > tx [ ring ] . statss ) ;
2021-10-05 17:30:30 -07:00
packets = priv - > tx [ ring ] . pkt_done ;
bytes = priv - > tx [ ring ] . bytes_done ;
2022-10-26 15:22:14 +02:00
} while ( u64_stats_fetch_retry ( & priv - > tx [ ring ] . statss ,
2019-07-01 15:57:53 -07:00
start ) ) ;
2021-10-05 17:30:30 -07:00
s - > tx_packets + = packets ;
s - > tx_bytes + = bytes ;
2019-07-01 15:57:53 -07:00
}
}
}
2024-06-25 00:12:30 +00:00
static int gve_alloc_flow_rule_caches ( struct gve_priv * priv )
{
struct gve_flow_rules_cache * flow_rules_cache = & priv - > flow_rules_cache ;
int err = 0 ;
if ( ! priv - > max_flow_rules )
return 0 ;
flow_rules_cache - > rules_cache =
kvcalloc ( GVE_FLOW_RULES_CACHE_SIZE , sizeof ( * flow_rules_cache - > rules_cache ) ,
GFP_KERNEL ) ;
if ( ! flow_rules_cache - > rules_cache ) {
dev_err ( & priv - > pdev - > dev , " Cannot alloc flow rules cache \n " ) ;
return - ENOMEM ;
}
flow_rules_cache - > rule_ids_cache =
kvcalloc ( GVE_FLOW_RULE_IDS_CACHE_SIZE , sizeof ( * flow_rules_cache - > rule_ids_cache ) ,
GFP_KERNEL ) ;
if ( ! flow_rules_cache - > rule_ids_cache ) {
dev_err ( & priv - > pdev - > dev , " Cannot alloc flow rule ids cache \n " ) ;
err = - ENOMEM ;
goto free_rules_cache ;
}
return 0 ;
free_rules_cache :
kvfree ( flow_rules_cache - > rules_cache ) ;
flow_rules_cache - > rules_cache = NULL ;
return err ;
}
static void gve_free_flow_rule_caches ( struct gve_priv * priv )
{
struct gve_flow_rules_cache * flow_rules_cache = & priv - > flow_rules_cache ;
kvfree ( flow_rules_cache - > rule_ids_cache ) ;
flow_rules_cache - > rule_ids_cache = NULL ;
kvfree ( flow_rules_cache - > rules_cache ) ;
flow_rules_cache - > rules_cache = NULL ;
}
2025-02-19 12:04:51 -08:00
static int gve_alloc_rss_config_cache ( struct gve_priv * priv )
{
struct gve_rss_config * rss_config = & priv - > rss_config ;
if ( ! priv - > cache_rss_config )
return 0 ;
rss_config - > hash_key = kcalloc ( priv - > rss_key_size ,
sizeof ( rss_config - > hash_key [ 0 ] ) ,
GFP_KERNEL ) ;
if ( ! rss_config - > hash_key )
return - ENOMEM ;
rss_config - > hash_lut = kcalloc ( priv - > rss_lut_size ,
sizeof ( rss_config - > hash_lut [ 0 ] ) ,
GFP_KERNEL ) ;
if ( ! rss_config - > hash_lut )
goto free_rss_key_cache ;
return 0 ;
free_rss_key_cache :
kfree ( rss_config - > hash_key ) ;
rss_config - > hash_key = NULL ;
return - ENOMEM ;
}
static void gve_free_rss_config_cache ( struct gve_priv * priv )
{
struct gve_rss_config * rss_config = & priv - > rss_config ;
kfree ( rss_config - > hash_key ) ;
kfree ( rss_config - > hash_lut ) ;
memset ( rss_config , 0 , sizeof ( * rss_config ) ) ;
}
2019-07-01 15:57:52 -07:00
static int gve_alloc_counter_array ( struct gve_priv * priv )
{
priv - > counter_array =
dma_alloc_coherent ( & priv - > pdev - > dev ,
priv - > num_event_counters *
sizeof ( * priv - > counter_array ) ,
& priv - > counter_array_bus , GFP_KERNEL ) ;
if ( ! priv - > counter_array )
return - ENOMEM ;
return 0 ;
}
static void gve_free_counter_array ( struct gve_priv * priv )
{
2021-10-05 19:42:20 -07:00
if ( ! priv - > counter_array )
return ;
2019-07-01 15:57:52 -07:00
dma_free_coherent ( & priv - > pdev - > dev ,
priv - > num_event_counters *
sizeof ( * priv - > counter_array ) ,
priv - > counter_array , priv - > counter_array_bus ) ;
priv - > counter_array = NULL ;
}
2020-09-11 10:38:47 -07:00
/* NIC requests to report stats */
static void gve_stats_report_task ( struct work_struct * work )
{
struct gve_priv * priv = container_of ( work , struct gve_priv ,
stats_report_task ) ;
if ( gve_get_do_report_stats ( priv ) ) {
gve_handle_report_stats ( priv ) ;
gve_clear_do_report_stats ( priv ) ;
}
}
static void gve_stats_report_schedule ( struct gve_priv * priv )
{
if ( ! gve_get_probe_in_progress ( priv ) & &
! gve_get_reset_in_progress ( priv ) ) {
gve_set_do_report_stats ( priv ) ;
queue_work ( priv - > gve_wq , & priv - > stats_report_task ) ;
}
}
static void gve_stats_report_timer ( struct timer_list * t )
{
2025-05-09 07:51:14 +02:00
struct gve_priv * priv = timer_container_of ( priv , t ,
stats_report_timer ) ;
2020-09-11 10:38:47 -07:00
mod_timer ( & priv - > stats_report_timer ,
round_jiffies ( jiffies +
msecs_to_jiffies ( priv - > stats_report_timer_period ) ) ) ;
gve_stats_report_schedule ( priv ) ;
}
static int gve_alloc_stats_report ( struct gve_priv * priv )
{
int tx_stats_num , rx_stats_num ;
2020-09-11 10:38:48 -07:00
tx_stats_num = ( GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM ) *
2023-03-15 16:33:08 -07:00
gve_num_tx_queues ( priv ) ;
2020-09-11 10:38:48 -07:00
rx_stats_num = ( GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM ) *
2020-09-11 10:38:47 -07:00
priv - > rx_cfg . num_queues ;
2020-10-27 16:30:45 -05:00
priv - > stats_report_len = struct_size ( priv - > stats_report , stats ,
2023-09-15 12:17:49 -06:00
size_add ( tx_stats_num , rx_stats_num ) ) ;
2020-09-11 10:38:47 -07:00
priv - > stats_report =
dma_alloc_coherent ( & priv - > pdev - > dev , priv - > stats_report_len ,
& priv - > stats_report_bus , GFP_KERNEL ) ;
if ( ! priv - > stats_report )
return - ENOMEM ;
/* Set up timer for the report-stats task */
timer_setup ( & priv - > stats_report_timer , gve_stats_report_timer , 0 ) ;
priv - > stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD ;
return 0 ;
}
static void gve_free_stats_report ( struct gve_priv * priv )
{
2021-10-05 19:42:20 -07:00
if ( ! priv - > stats_report )
return ;
2025-04-05 10:17:26 +02:00
timer_delete_sync ( & priv - > stats_report_timer ) ;
2020-09-11 10:38:47 -07:00
dma_free_coherent ( & priv - > pdev - > dev , priv - > stats_report_len ,
priv - > stats_report , priv - > stats_report_bus ) ;
priv - > stats_report = NULL ;
}
2019-07-01 15:57:52 -07:00
static irqreturn_t gve_mgmnt_intr ( int irq , void * arg )
{
2019-07-01 15:57:54 -07:00
struct gve_priv * priv = arg ;
queue_work ( priv - > gve_wq , & priv - > service_task ) ;
2019-07-01 15:57:52 -07:00
return IRQ_HANDLED ;
}
static irqreturn_t gve_intr ( int irq , void * arg )
{
2019-07-01 15:57:53 -07:00
struct gve_notify_block * block = arg ;
struct gve_priv * priv = block - > priv ;
iowrite32be ( GVE_IRQ_MASK , gve_irq_doorbell ( priv , block ) ) ;
napi_schedule_irqoff ( & block - > napi ) ;
2019-07-01 15:57:52 -07:00
return IRQ_HANDLED ;
}
2021-06-24 11:06:28 -07:00
static irqreturn_t gve_intr_dqo ( int irq , void * arg )
{
struct gve_notify_block * block = arg ;
/* Interrupts are automatically masked */
napi_schedule_irqoff ( & block - > napi ) ;
return IRQ_HANDLED ;
}
2024-05-01 23:25:45 +00:00
static int gve_is_napi_on_home_cpu ( struct gve_priv * priv , u32 irq )
{
int cpu_curr = smp_processor_id ( ) ;
const struct cpumask * aff_mask ;
aff_mask = irq_get_effective_affinity_mask ( irq ) ;
if ( unlikely ( ! aff_mask ) )
return 1 ;
return cpumask_test_cpu ( cpu_curr , aff_mask ) ;
}
2024-01-22 18:26:28 +00:00
int gve_napi_poll ( struct napi_struct * napi , int budget )
2019-07-01 15:57:53 -07:00
{
struct gve_notify_block * block ;
__be32 __iomem * irq_doorbell ;
bool reschedule = false ;
struct gve_priv * priv ;
2021-10-11 08:36:44 -07:00
int work_done = 0 ;
2019-07-01 15:57:53 -07:00
block = container_of ( napi , struct gve_notify_block , napi ) ;
priv = block - > priv ;
2023-03-15 16:33:10 -07:00
if ( block - > tx ) {
if ( block - > tx - > q_num < priv - > tx_cfg . num_queues )
reschedule | = gve_tx_poll ( block , budget ) ;
2023-11-13 16:41:44 -08:00
else if ( budget )
2023-03-15 16:33:10 -07:00
reschedule | = gve_xdp_poll ( block , budget ) ;
}
2023-11-13 16:41:44 -08:00
if ( ! budget )
return 0 ;
2021-10-11 08:36:44 -07:00
if ( block - > rx ) {
work_done = gve_rx_poll ( block , budget ) ;
2024-12-18 05:34:14 -08:00
/* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of
* TX and RX work done .
*/
if ( priv - > xdp_prog )
work_done = max_t ( int , work_done ,
gve_xsk_tx_poll ( block , budget ) ) ;
2021-10-11 08:36:44 -07:00
reschedule | = work_done = = budget ;
}
2019-07-01 15:57:53 -07:00
if ( reschedule )
return budget ;
2021-10-11 08:36:44 -07:00
/* Complete processing - don't unmask irq if busy polling is enabled */
if ( likely ( napi_complete_done ( napi , work_done ) ) ) {
irq_doorbell = gve_irq_doorbell ( priv , block ) ;
iowrite32be ( GVE_IRQ_ACK | GVE_IRQ_EVENT , irq_doorbell ) ;
2019-07-01 15:57:53 -07:00
2021-10-11 08:36:46 -07:00
/* Ensure IRQ ACK is visible before we check pending work.
* If queue had issued updates , it would be truly visible .
2021-10-11 08:36:44 -07:00
*/
mb ( ) ;
2019-07-01 15:57:53 -07:00
2021-10-11 08:36:44 -07:00
if ( block - > tx )
2021-10-11 08:36:46 -07:00
reschedule | = gve_tx_clean_pending ( priv , block - > tx ) ;
2021-10-11 08:36:44 -07:00
if ( block - > rx )
reschedule | = gve_rx_work_pending ( block - > rx ) ;
2023-10-09 15:37:52 +02:00
if ( reschedule & & napi_schedule ( napi ) )
2021-10-11 08:36:44 -07:00
iowrite32be ( GVE_IRQ_MASK , irq_doorbell ) ;
}
return work_done ;
2019-07-01 15:57:53 -07:00
}
2024-01-22 18:26:28 +00:00
int gve_napi_poll_dqo ( struct napi_struct * napi , int budget )
2021-06-24 11:06:28 -07:00
{
struct gve_notify_block * block =
container_of ( napi , struct gve_notify_block , napi ) ;
struct gve_priv * priv = block - > priv ;
bool reschedule = false ;
int work_done = 0 ;
2025-06-18 20:56:13 +00:00
if ( block - > tx ) {
if ( block - > tx - > q_num < priv - > tx_cfg . num_queues )
reschedule | = gve_tx_poll_dqo ( block , /*do_clean=*/ true ) ;
else
reschedule | = gve_xdp_poll_dqo ( block ) ;
}
2021-06-24 11:06:28 -07:00
2023-11-13 16:41:44 -08:00
if ( ! budget )
return 0 ;
2021-06-24 11:06:28 -07:00
if ( block - > rx ) {
work_done = gve_rx_poll_dqo ( block , budget ) ;
gve: implement DQO TX datapath for AF_XDP zero-copy
In the descriptor clean path, a number of changes need to be made to
accommodate out of order completions and double completions.
The XSK stack can only handle completions being processed in order, as a
single counter is incremented in xsk_tx_completed to sigify how many XSK
descriptors have been completed. Because completions can come back out
of order in DQ, a separate queue of XSK descriptors must be maintained.
This queue keeps the pending packets in the order that they were written
so that the descriptors can be counted in xsk_tx_completed in the same
order.
For double completions, a new pending packet state and type are
introduced. The new type, GVE_TX_PENDING_PACKET_DQO_XSK, plays an
anlogous role to pre-existing _SKB and _XDP_FRAME pending packet types
for XSK descriptors. The new state, GVE_PACKET_STATE_XSK_COMPLETE,
represents packets for which no more completions are expected. This
includes packets which have received a packet completion or reinjection
completion, as well as packets whose reinjection completion timer have
timed out. At this point, such packets can be counted as part of
xsk_tx_completed() and freed.
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Jeroen de Borst <jeroendb@google.com>
Link: https://patch.msgid.link/20250717152839.973004-5-jeroendb@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-07-17 08:28:38 -07:00
/* Poll XSK TX as part of RX NAPI. Setup re-poll based on if
* either datapath has more work to do .
*/
if ( priv - > xdp_prog )
reschedule | = gve_xsk_tx_poll_dqo ( block , budget ) ;
2021-06-24 11:06:28 -07:00
reschedule | = work_done = = budget ;
}
2024-05-01 23:25:45 +00:00
if ( reschedule ) {
/* Reschedule by returning budget only if already on the correct
* cpu .
*/
if ( likely ( gve_is_napi_on_home_cpu ( priv , block - > irq ) ) )
return budget ;
/* If not on the cpu with which this queue's irq has affinity
* with , we avoid rescheduling napi and arm the irq instead so
* that napi gets rescheduled back eventually onto the right
* cpu .
*/
if ( work_done = = budget )
work_done - - ;
}
2021-06-24 11:06:28 -07:00
if ( likely ( napi_complete_done ( napi , work_done ) ) ) {
/* Enable interrupts again.
*
* We don ' t need to repoll afterwards because HW supports the
* PCI MSI - X PBA feature .
*
* Another interrupt would be triggered if a new event came in
* since the last one .
*/
gve_write_irq_doorbell_dqo ( priv , block ,
GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO ) ;
}
return work_done ;
}
2025-07-07 14:01:07 -07:00
static const struct cpumask * gve_get_node_mask ( struct gve_priv * priv )
{
if ( priv - > numa_node = = NUMA_NO_NODE )
return cpu_all_mask ;
else
return cpumask_of_node ( priv - > numa_node ) ;
}
2019-07-01 15:57:52 -07:00
static int gve_alloc_notify_blocks ( struct gve_priv * priv )
{
int num_vecs_requested = priv - > num_ntfy_blks + 1 ;
2025-07-07 14:01:07 -07:00
const struct cpumask * node_mask ;
unsigned int cur_cpu ;
2019-07-01 15:57:52 -07:00
int vecs_enabled ;
int i , j ;
int err ;
2021-09-28 16:38:05 -05:00
priv - > msix_vectors = kvcalloc ( num_vecs_requested ,
2019-07-01 15:57:52 -07:00
sizeof ( * priv - > msix_vectors ) , GFP_KERNEL ) ;
if ( ! priv - > msix_vectors )
return - ENOMEM ;
for ( i = 0 ; i < num_vecs_requested ; i + + )
priv - > msix_vectors [ i ] . entry = i ;
vecs_enabled = pci_enable_msix_range ( priv - > pdev , priv - > msix_vectors ,
GVE_MIN_MSIX , num_vecs_requested ) ;
if ( vecs_enabled < 0 ) {
dev_err ( & priv - > pdev - > dev , " Could not enable min msix %d/%d \n " ,
GVE_MIN_MSIX , vecs_enabled ) ;
err = vecs_enabled ;
goto abort_with_msix_vectors ;
}
if ( vecs_enabled ! = num_vecs_requested ) {
2019-07-01 15:57:53 -07:00
int new_num_ntfy_blks = ( vecs_enabled - 1 ) & ~ 0x1 ;
int vecs_per_type = new_num_ntfy_blks / 2 ;
int vecs_left = new_num_ntfy_blks % 2 ;
priv - > num_ntfy_blks = new_num_ntfy_blks ;
2021-05-17 14:08:12 -07:00
priv - > mgmt_msix_idx = priv - > num_ntfy_blks ;
2019-07-01 15:57:53 -07:00
priv - > tx_cfg . max_queues = min_t ( int , priv - > tx_cfg . max_queues ,
vecs_per_type ) ;
priv - > rx_cfg . max_queues = min_t ( int , priv - > rx_cfg . max_queues ,
vecs_per_type + vecs_left ) ;
2019-07-01 15:57:52 -07:00
dev_err ( & priv - > pdev - > dev ,
2019-07-01 15:57:53 -07:00
" Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d \n " ,
vecs_enabled , priv - > tx_cfg . max_queues ,
priv - > rx_cfg . max_queues ) ;
if ( priv - > tx_cfg . num_queues > priv - > tx_cfg . max_queues )
priv - > tx_cfg . num_queues = priv - > tx_cfg . max_queues ;
if ( priv - > rx_cfg . num_queues > priv - > rx_cfg . max_queues )
priv - > rx_cfg . num_queues = priv - > rx_cfg . max_queues ;
2019-07-01 15:57:52 -07:00
}
/* Setup Management Vector - the last vector */
2023-02-03 13:20:45 -08:00
snprintf ( priv - > mgmt_msix_name , sizeof ( priv - > mgmt_msix_name ) , " gve-mgmnt@pci:%s " ,
pci_name ( priv - > pdev ) ) ;
2019-07-01 15:57:52 -07:00
err = request_irq ( priv - > msix_vectors [ priv - > mgmt_msix_idx ] . vector ,
gve_mgmnt_intr , 0 , priv - > mgmt_msix_name , priv ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev , " Did not receive management vector. \n " ) ;
goto abort_with_msix_enabled ;
}
2021-12-15 16:46:46 -08:00
priv - > irq_db_indices =
2019-07-01 15:57:52 -07:00
dma_alloc_coherent ( & priv - > pdev - > dev ,
priv - > num_ntfy_blks *
2021-12-15 16:46:46 -08:00
sizeof ( * priv - > irq_db_indices ) ,
& priv - > irq_db_indices_bus , GFP_KERNEL ) ;
if ( ! priv - > irq_db_indices ) {
2019-07-01 15:57:52 -07:00
err = - ENOMEM ;
goto abort_with_mgmt_vector ;
}
2021-12-15 16:46:46 -08:00
priv - > ntfy_blocks = kvzalloc ( priv - > num_ntfy_blks *
sizeof ( * priv - > ntfy_blocks ) , GFP_KERNEL ) ;
if ( ! priv - > ntfy_blocks ) {
err = - ENOMEM ;
goto abort_with_irq_db_indices ;
}
2019-07-01 15:57:52 -07:00
/* Setup the other blocks - the first n-1 vectors */
2025-07-07 14:01:07 -07:00
node_mask = gve_get_node_mask ( priv ) ;
cur_cpu = cpumask_first ( node_mask ) ;
2019-07-01 15:57:52 -07:00
for ( i = 0 ; i < priv - > num_ntfy_blks ; i + + ) {
struct gve_notify_block * block = & priv - > ntfy_blocks [ i ] ;
int msix_idx = i ;
2023-02-03 13:20:45 -08:00
snprintf ( block - > name , sizeof ( block - > name ) , " gve-ntfy-blk%d@pci:%s " ,
i , pci_name ( priv - > pdev ) ) ;
2019-07-01 15:57:52 -07:00
block - > priv = priv ;
err = request_irq ( priv - > msix_vectors [ msix_idx ] . vector ,
2021-06-24 11:06:28 -07:00
gve_is_gqi ( priv ) ? gve_intr : gve_intr_dqo ,
0 , block - > name , block ) ;
2019-07-01 15:57:52 -07:00
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Failed to receive msix vector %d \n " , i ) ;
goto abort_with_some_ntfy_blocks ;
}
2024-05-01 23:25:45 +00:00
block - > irq = priv - > msix_vectors [ msix_idx ] . vector ;
2025-07-07 14:01:07 -07:00
irq_set_affinity_and_hint ( block - > irq ,
cpumask_of ( cur_cpu ) ) ;
2021-12-15 16:46:46 -08:00
block - > irq_db_index = & priv - > irq_db_indices [ i ] . index ;
2025-07-07 14:01:07 -07:00
cur_cpu = cpumask_next ( cur_cpu , node_mask ) ;
/* Wrap once CPUs in the node have been exhausted, or when
* starting RX queue affinities . TX and RX queues of the same
* index share affinity .
*/
if ( cur_cpu > = nr_cpu_ids | | ( i + 1 ) = = priv - > tx_cfg . max_queues )
cur_cpu = cpumask_first ( node_mask ) ;
2019-07-01 15:57:52 -07:00
}
return 0 ;
abort_with_some_ntfy_blocks :
for ( j = 0 ; j < i ; j + + ) {
struct gve_notify_block * block = & priv - > ntfy_blocks [ j ] ;
int msix_idx = j ;
irq_set_affinity_hint ( priv - > msix_vectors [ msix_idx ] . vector ,
NULL ) ;
free_irq ( priv - > msix_vectors [ msix_idx ] . vector , block ) ;
2024-05-01 23:25:45 +00:00
block - > irq = 0 ;
2019-07-01 15:57:52 -07:00
}
2021-12-15 16:46:46 -08:00
kvfree ( priv - > ntfy_blocks ) ;
2019-07-01 15:57:52 -07:00
priv - > ntfy_blocks = NULL ;
2021-12-15 16:46:46 -08:00
abort_with_irq_db_indices :
dma_free_coherent ( & priv - > pdev - > dev , priv - > num_ntfy_blks *
sizeof ( * priv - > irq_db_indices ) ,
priv - > irq_db_indices , priv - > irq_db_indices_bus ) ;
priv - > irq_db_indices = NULL ;
2019-07-01 15:57:52 -07:00
abort_with_mgmt_vector :
free_irq ( priv - > msix_vectors [ priv - > mgmt_msix_idx ] . vector , priv ) ;
abort_with_msix_enabled :
pci_disable_msix ( priv - > pdev ) ;
abort_with_msix_vectors :
2019-07-17 10:05:11 +08:00
kvfree ( priv - > msix_vectors ) ;
2019-07-01 15:57:52 -07:00
priv - > msix_vectors = NULL ;
return err ;
}
static void gve_free_notify_blocks ( struct gve_priv * priv )
{
int i ;
2021-10-05 19:42:20 -07:00
if ( ! priv - > msix_vectors )
return ;
2021-05-17 14:08:13 -07:00
2021-10-05 19:42:20 -07:00
/* Free the irqs */
for ( i = 0 ; i < priv - > num_ntfy_blks ; i + + ) {
struct gve_notify_block * block = & priv - > ntfy_blocks [ i ] ;
int msix_idx = i ;
irq_set_affinity_hint ( priv - > msix_vectors [ msix_idx ] . vector ,
NULL ) ;
free_irq ( priv - > msix_vectors [ msix_idx ] . vector , block ) ;
2024-05-01 23:25:45 +00:00
block - > irq = 0 ;
2019-07-01 15:57:52 -07:00
}
2021-10-05 19:42:20 -07:00
free_irq ( priv - > msix_vectors [ priv - > mgmt_msix_idx ] . vector , priv ) ;
2021-12-15 16:46:46 -08:00
kvfree ( priv - > ntfy_blocks ) ;
2019-07-01 15:57:52 -07:00
priv - > ntfy_blocks = NULL ;
2021-12-15 16:46:46 -08:00
dma_free_coherent ( & priv - > pdev - > dev , priv - > num_ntfy_blks *
sizeof ( * priv - > irq_db_indices ) ,
priv - > irq_db_indices , priv - > irq_db_indices_bus ) ;
priv - > irq_db_indices = NULL ;
2019-07-01 15:57:52 -07:00
pci_disable_msix ( priv - > pdev ) ;
2019-07-17 10:05:11 +08:00
kvfree ( priv - > msix_vectors ) ;
2019-07-01 15:57:52 -07:00
priv - > msix_vectors = NULL ;
}
static int gve_setup_device_resources ( struct gve_priv * priv )
{
int err ;
2024-06-25 00:12:30 +00:00
err = gve_alloc_flow_rule_caches ( priv ) ;
2019-07-01 15:57:52 -07:00
if ( err )
return err ;
2025-02-19 12:04:51 -08:00
err = gve_alloc_rss_config_cache ( priv ) ;
2024-06-25 00:12:30 +00:00
if ( err )
goto abort_with_flow_rule_caches ;
2025-02-19 12:04:51 -08:00
err = gve_alloc_counter_array ( priv ) ;
if ( err )
goto abort_with_rss_config_cache ;
2025-06-14 00:07:51 +00:00
err = gve_init_clock ( priv ) ;
2019-07-01 15:57:52 -07:00
if ( err )
goto abort_with_counter ;
2025-06-14 00:07:51 +00:00
err = gve_alloc_notify_blocks ( priv ) ;
if ( err )
goto abort_with_clock ;
2020-09-11 10:38:47 -07:00
err = gve_alloc_stats_report ( priv ) ;
if ( err )
goto abort_with_ntfy_blocks ;
2019-07-01 15:57:52 -07:00
err = gve_adminq_configure_device_resources ( priv ,
priv - > counter_array_bus ,
priv - > num_event_counters ,
2021-12-15 16:46:46 -08:00
priv - > irq_db_indices_bus ,
2019-07-01 15:57:52 -07:00
priv - > num_ntfy_blks ) ;
if ( unlikely ( err ) ) {
dev_err ( & priv - > pdev - > dev ,
" could not setup device_resources: err=%d \n " , err ) ;
err = - ENXIO ;
2020-09-11 10:38:47 -07:00
goto abort_with_stats_report ;
2019-07-01 15:57:52 -07:00
}
2021-06-24 11:06:24 -07:00
2023-08-04 21:34:41 +00:00
if ( ! gve_is_gqi ( priv ) ) {
2021-06-24 11:06:24 -07:00
priv - > ptype_lut_dqo = kvzalloc ( sizeof ( * priv - > ptype_lut_dqo ) ,
GFP_KERNEL ) ;
if ( ! priv - > ptype_lut_dqo ) {
err = - ENOMEM ;
goto abort_with_stats_report ;
}
err = gve_adminq_get_ptype_map_dqo ( priv , priv - > ptype_lut_dqo ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Failed to get ptype map: err=%d \n " , err ) ;
goto abort_with_ptype_lut ;
}
}
2025-02-19 12:04:51 -08:00
err = gve_init_rss_config ( priv , priv - > rx_cfg . num_queues ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev , " Failed to init RSS config " ) ;
goto abort_with_ptype_lut ;
}
2020-09-11 10:38:47 -07:00
err = gve_adminq_report_stats ( priv , priv - > stats_report_len ,
priv - > stats_report_bus ,
GVE_STATS_REPORT_TIMER_PERIOD ) ;
if ( err )
dev_err ( & priv - > pdev - > dev ,
" Failed to report stats: err=%d \n " , err ) ;
2019-07-01 15:57:52 -07:00
gve_set_device_resources_ok ( priv ) ;
return 0 ;
2021-06-24 11:06:24 -07:00
abort_with_ptype_lut :
kvfree ( priv - > ptype_lut_dqo ) ;
priv - > ptype_lut_dqo = NULL ;
2020-09-11 10:38:47 -07:00
abort_with_stats_report :
gve_free_stats_report ( priv ) ;
2019-07-01 15:57:52 -07:00
abort_with_ntfy_blocks :
gve_free_notify_blocks ( priv ) ;
2025-06-14 00:07:51 +00:00
abort_with_clock :
gve_teardown_clock ( priv ) ;
2019-07-01 15:57:52 -07:00
abort_with_counter :
gve_free_counter_array ( priv ) ;
2025-02-19 12:04:51 -08:00
abort_with_rss_config_cache :
gve_free_rss_config_cache ( priv ) ;
2024-06-25 00:12:30 +00:00
abort_with_flow_rule_caches :
gve_free_flow_rule_caches ( priv ) ;
2021-06-24 11:06:24 -07:00
2019-07-01 15:57:52 -07:00
return err ;
}
2019-07-01 15:57:54 -07:00
static void gve_trigger_reset ( struct gve_priv * priv ) ;
2019-07-01 15:57:52 -07:00
static void gve_teardown_device_resources ( struct gve_priv * priv )
{
int err ;
/* Tell device its resources are being freed */
if ( gve_get_device_resources_ok ( priv ) ) {
2024-06-25 00:12:31 +00:00
err = gve_flow_rules_reset ( priv ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Failed to reset flow rules: err=%d \n " , err ) ;
gve_trigger_reset ( priv ) ;
}
2020-09-11 10:38:47 -07:00
/* detach the stats report */
err = gve_adminq_report_stats ( priv , 0 , 0x0 , GVE_STATS_REPORT_TIMER_PERIOD ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Failed to detach stats report: err=%d \n " , err ) ;
gve_trigger_reset ( priv ) ;
}
2019-07-01 15:57:52 -07:00
err = gve_adminq_deconfigure_device_resources ( priv ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Could not deconfigure device resources: err=%d \n " ,
err ) ;
2019-07-01 15:57:54 -07:00
gve_trigger_reset ( priv ) ;
2019-07-01 15:57:52 -07:00
}
}
2021-06-24 11:06:24 -07:00
kvfree ( priv - > ptype_lut_dqo ) ;
priv - > ptype_lut_dqo = NULL ;
2024-06-25 00:12:30 +00:00
gve_free_flow_rule_caches ( priv ) ;
2025-02-19 12:04:51 -08:00
gve_free_rss_config_cache ( priv ) ;
2019-07-01 15:57:52 -07:00
gve_free_counter_array ( priv ) ;
gve_free_notify_blocks ( priv ) ;
2020-09-11 10:38:47 -07:00
gve_free_stats_report ( priv ) ;
2025-06-14 00:07:53 +00:00
gve_teardown_clock ( priv ) ;
2019-07-01 15:57:52 -07:00
gve_clear_device_resources_ok ( priv ) ;
}
2024-05-01 23:25:48 +00:00
static int gve_unregister_qpl ( struct gve_priv * priv ,
struct gve_queue_page_list * qpl )
2024-01-22 18:26:29 +00:00
{
int err ;
2024-05-01 23:25:48 +00:00
if ( ! qpl )
return 0 ;
err = gve_adminq_unregister_page_list ( priv , qpl - > id ) ;
2024-01-22 18:26:29 +00:00
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" Failed to unregister queue page list %d \n " ,
2024-05-01 23:25:48 +00:00
qpl - > id ) ;
2024-01-22 18:26:29 +00:00
return err ;
}
2024-05-01 23:25:48 +00:00
priv - > num_registered_pages - = qpl - > num_entries ;
2024-01-22 18:26:29 +00:00
return 0 ;
}
2024-05-01 23:25:48 +00:00
static int gve_register_qpl ( struct gve_priv * priv ,
struct gve_queue_page_list * qpl )
2024-01-22 18:26:29 +00:00
{
int pages ;
int err ;
2024-05-01 23:25:48 +00:00
if ( ! qpl )
return 0 ;
2024-01-22 18:26:29 +00:00
2024-05-01 23:25:48 +00:00
pages = qpl - > num_entries ;
2024-01-22 18:26:29 +00:00
if ( pages + priv - > num_registered_pages > priv - > max_registered_pages ) {
netif_err ( priv , drv , priv - > dev ,
" Reached max number of registered pages %llu > %llu \n " ,
pages + priv - > num_registered_pages ,
priv - > max_registered_pages ) ;
return - EINVAL ;
}
2024-05-01 23:25:48 +00:00
err = gve_adminq_register_page_list ( priv , qpl ) ;
2024-01-22 18:26:29 +00:00
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" failed to register queue page list %d \n " ,
2024-05-01 23:25:48 +00:00
qpl - > id ) ;
2024-01-22 18:26:29 +00:00
return err ;
}
priv - > num_registered_pages + = pages ;
return 0 ;
}
2024-05-01 23:25:48 +00:00
static struct gve_queue_page_list * gve_tx_get_qpl ( struct gve_priv * priv , int idx )
{
struct gve_tx_ring * tx = & priv - > tx [ idx ] ;
if ( gve_is_gqi ( priv ) )
return tx - > tx_fifo . qpl ;
else
return tx - > dqo . qpl ;
}
static struct gve_queue_page_list * gve_rx_get_qpl ( struct gve_priv * priv , int idx )
{
struct gve_rx_ring * rx = & priv - > rx [ idx ] ;
if ( gve_is_gqi ( priv ) )
return rx - > data . qpl ;
else
return rx - > dqo . qpl ;
}
2019-07-01 15:57:53 -07:00
static int gve_register_qpls ( struct gve_priv * priv )
{
2024-01-22 18:26:29 +00:00
int num_tx_qpls , num_rx_qpls ;
2019-07-01 15:57:53 -07:00
int err ;
int i ;
2025-03-21 00:29:06 +00:00
num_tx_qpls = gve_num_tx_qpls ( & priv - > tx_cfg , gve_is_qpl ( priv ) ) ;
2024-01-22 18:26:29 +00:00
num_rx_qpls = gve_num_rx_qpls ( & priv - > rx_cfg , gve_is_qpl ( priv ) ) ;
for ( i = 0 ; i < num_tx_qpls ; i + + ) {
2024-05-01 23:25:48 +00:00
err = gve_register_qpl ( priv , gve_tx_get_qpl ( priv , i ) ) ;
2024-01-22 18:26:29 +00:00
if ( err )
2023-03-15 16:33:09 -07:00
return err ;
}
2024-01-22 18:26:29 +00:00
for ( i = 0 ; i < num_rx_qpls ; i + + ) {
2024-05-01 23:25:48 +00:00
err = gve_register_qpl ( priv , gve_rx_get_qpl ( priv , i ) ) ;
2024-01-22 18:26:29 +00:00
if ( err )
2019-07-01 15:57:53 -07:00
return err ;
}
2024-01-22 18:26:29 +00:00
2019-07-01 15:57:53 -07:00
return 0 ;
}
static int gve_unregister_qpls ( struct gve_priv * priv )
{
2024-01-22 18:26:29 +00:00
int num_tx_qpls , num_rx_qpls ;
2019-07-01 15:57:53 -07:00
int err ;
int i ;
2025-03-21 00:29:06 +00:00
num_tx_qpls = gve_num_tx_qpls ( & priv - > tx_cfg , gve_is_qpl ( priv ) ) ;
2024-01-22 18:26:29 +00:00
num_rx_qpls = gve_num_rx_qpls ( & priv - > rx_cfg , gve_is_qpl ( priv ) ) ;
for ( i = 0 ; i < num_tx_qpls ; i + + ) {
2024-05-01 23:25:48 +00:00
err = gve_unregister_qpl ( priv , gve_tx_get_qpl ( priv , i ) ) ;
2024-01-22 18:26:29 +00:00
/* This failure will trigger a reset - no need to clean */
if ( err )
2023-03-15 16:33:09 -07:00
return err ;
}
2024-01-22 18:26:29 +00:00
for ( i = 0 ; i < num_rx_qpls ; i + + ) {
2024-05-01 23:25:48 +00:00
err = gve_unregister_qpl ( priv , gve_rx_get_qpl ( priv , i ) ) ;
2024-01-22 18:26:29 +00:00
/* This failure will trigger a reset - no need to clean */
if ( err )
2019-07-01 15:57:53 -07:00
return err ;
}
return 0 ;
}
static int gve_create_rings ( struct gve_priv * priv )
{
2023-03-15 16:33:08 -07:00
int num_tx_queues = gve_num_tx_queues ( priv ) ;
2019-07-01 15:57:53 -07:00
int err ;
int i ;
2023-03-15 16:33:09 -07:00
err = gve_adminq_create_tx_queues ( priv , 0 , num_tx_queues ) ;
2020-09-11 10:38:49 -07:00
if ( err ) {
netif_err ( priv , drv , priv - > dev , " failed to create %d tx queues \n " ,
2023-03-15 16:33:08 -07:00
num_tx_queues ) ;
2020-09-11 10:38:49 -07:00
/* This failure will trigger a reset - no need to clean
* up
*/
return err ;
2019-07-01 15:57:53 -07:00
}
2020-09-11 10:38:49 -07:00
netif_dbg ( priv , drv , priv - > dev , " created %d tx queues \n " ,
2023-03-15 16:33:08 -07:00
num_tx_queues ) ;
2020-09-11 10:38:49 -07:00
err = gve_adminq_create_rx_queues ( priv , priv - > rx_cfg . num_queues ) ;
if ( err ) {
netif_err ( priv , drv , priv - > dev , " failed to create %d rx queues \n " ,
priv - > rx_cfg . num_queues ) ;
/* This failure will trigger a reset - no need to clean
* up
2019-07-01 15:57:53 -07:00
*/
2020-09-11 10:38:49 -07:00
return err ;
2019-07-01 15:57:53 -07:00
}
2020-09-11 10:38:49 -07:00
netif_dbg ( priv , drv , priv - > dev , " created %d rx queues \n " ,
priv - > rx_cfg . num_queues ) ;
2021-06-24 11:06:29 -07:00
if ( gve_is_gqi ( priv ) ) {
/* Rx data ring has been prefilled with packet buffers at queue
* allocation time .
*
* Write the doorbell to provide descriptor slots and packet
* buffers to the NIC .
*/
for ( i = 0 ; i < priv - > rx_cfg . num_queues ; i + + )
gve_rx_write_doorbell ( priv , & priv - > rx [ i ] ) ;
} else {
for ( i = 0 ; i < priv - > rx_cfg . num_queues ; i + + ) {
/* Post buffers and ring doorbell. */
gve_rx_post_buffers_dqo ( & priv - > rx [ i ] ) ;
}
}
2019-07-01 15:57:53 -07:00
return 0 ;
}
2024-01-22 18:26:29 +00:00
static void init_xdp_sync_stats ( struct gve_priv * priv )
2023-03-15 16:33:10 -07:00
{
int start_id = gve_xdp_tx_start_queue_id ( priv ) ;
int i ;
2024-01-22 18:26:29 +00:00
/* Init stats */
2025-03-21 00:29:06 +00:00
for ( i = start_id ; i < start_id + priv - > tx_cfg . num_xdp_queues ; i + + ) {
2023-03-15 16:33:10 -07:00
int ntfy_idx = gve_tx_idx_to_ntfy ( priv , i ) ;
u64_stats_init ( & priv - > tx [ i ] . statss ) ;
priv - > tx [ i ] . ntfy_id = ntfy_idx ;
}
}
2024-01-22 18:26:29 +00:00
static void gve_init_sync_stats ( struct gve_priv * priv )
2021-06-24 11:06:28 -07:00
{
int i ;
2024-01-22 18:26:29 +00:00
for ( i = 0 ; i < priv - > tx_cfg . num_queues ; i + + )
2021-06-24 11:06:28 -07:00
u64_stats_init ( & priv - > tx [ i ] . statss ) ;
2024-01-22 18:26:29 +00:00
/* Init stats for XDP TX queues */
init_xdp_sync_stats ( priv ) ;
for ( i = 0 ; i < priv - > rx_cfg . num_queues ; i + + )
2021-06-24 11:06:28 -07:00
u64_stats_init ( & priv - > rx [ i ] . statss ) ;
2024-01-22 18:26:29 +00:00
}
static void gve_tx_get_curr_alloc_cfg ( struct gve_priv * priv ,
struct gve_tx_alloc_rings_cfg * cfg )
{
cfg - > qcfg = & priv - > tx_cfg ;
cfg - > raw_addressing = ! gve_is_qpl ( priv ) ;
cfg - > ring_size = priv - > tx_desc_cnt ;
2025-03-21 00:29:06 +00:00
cfg - > num_xdp_rings = cfg - > qcfg - > num_xdp_queues ;
2024-01-22 18:26:29 +00:00
cfg - > tx = priv - > tx ;
}
2025-03-21 00:29:06 +00:00
static void gve_tx_stop_rings ( struct gve_priv * priv , int num_rings )
2024-01-22 18:26:29 +00:00
{
int i ;
if ( ! priv - > tx )
return ;
2025-03-21 00:29:06 +00:00
for ( i = 0 ; i < num_rings ; i + + ) {
2024-01-22 18:26:29 +00:00
if ( gve_is_gqi ( priv ) )
gve_tx_stop_ring_gqi ( priv , i ) ;
else
gve_tx_stop_ring_dqo ( priv , i ) ;
2021-06-24 11:06:28 -07:00
}
}
2025-03-21 00:29:06 +00:00
static void gve_tx_start_rings ( struct gve_priv * priv , int num_rings )
2021-06-24 11:06:29 -07:00
{
2024-01-22 18:26:29 +00:00
int i ;
2025-03-21 00:29:06 +00:00
for ( i = 0 ; i < num_rings ; i + + ) {
2024-01-22 18:26:29 +00:00
if ( gve_is_gqi ( priv ) )
gve_tx_start_ring_gqi ( priv , i ) ;
else
gve_tx_start_ring_dqo ( priv , i ) ;
2021-06-24 11:06:29 -07:00
}
}
2024-05-01 23:25:48 +00:00
static int gve_queues_mem_alloc ( struct gve_priv * priv ,
struct gve_tx_alloc_rings_cfg * tx_alloc_cfg ,
struct gve_rx_alloc_rings_cfg * rx_alloc_cfg )
2019-07-01 15:57:53 -07:00
{
int err ;
2021-06-24 11:06:29 -07:00
if ( gve_is_gqi ( priv ) )
2024-01-22 18:26:29 +00:00
err = gve_tx_alloc_rings_gqi ( priv , tx_alloc_cfg ) ;
2021-06-24 11:06:29 -07:00
else
2024-01-22 18:26:29 +00:00
err = gve_tx_alloc_rings_dqo ( priv , tx_alloc_cfg ) ;
2019-07-01 15:57:53 -07:00
if ( err )
2024-01-22 18:26:29 +00:00
return err ;
2021-06-24 11:06:29 -07:00
if ( gve_is_gqi ( priv ) )
2024-01-22 18:26:29 +00:00
err = gve_rx_alloc_rings_gqi ( priv , rx_alloc_cfg ) ;
2021-06-24 11:06:29 -07:00
else
2024-01-22 18:26:29 +00:00
err = gve_rx_alloc_rings_dqo ( priv , rx_alloc_cfg ) ;
2019-07-01 15:57:53 -07:00
if ( err )
2024-01-22 18:26:29 +00:00
goto free_tx ;
2019-07-01 15:57:53 -07:00
return 0 ;
free_tx :
2024-01-22 18:26:29 +00:00
if ( gve_is_gqi ( priv ) )
gve_tx_free_rings_gqi ( priv , tx_alloc_cfg ) ;
else
gve_tx_free_rings_dqo ( priv , tx_alloc_cfg ) ;
2019-07-01 15:57:53 -07:00
return err ;
}
static int gve_destroy_rings ( struct gve_priv * priv )
{
2023-03-15 16:33:08 -07:00
int num_tx_queues = gve_num_tx_queues ( priv ) ;
2019-07-01 15:57:53 -07:00
int err ;
2023-03-15 16:33:09 -07:00
err = gve_adminq_destroy_tx_queues ( priv , 0 , num_tx_queues ) ;
2020-09-11 10:38:49 -07:00
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" failed to destroy tx queues \n " ) ;
/* This failure will trigger a reset - no need to clean up */
return err ;
2019-07-01 15:57:53 -07:00
}
2020-09-11 10:38:49 -07:00
netif_dbg ( priv , drv , priv - > dev , " destroyed tx queues \n " ) ;
err = gve_adminq_destroy_rx_queues ( priv , priv - > rx_cfg . num_queues ) ;
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" failed to destroy rx queues \n " ) ;
/* This failure will trigger a reset - no need to clean up */
return err ;
2019-07-01 15:57:53 -07:00
}
2020-09-11 10:38:49 -07:00
netif_dbg ( priv , drv , priv - > dev , " destroyed rx queues \n " ) ;
2019-07-01 15:57:53 -07:00
return 0 ;
}
2024-05-01 23:25:48 +00:00
static void gve_queues_mem_free ( struct gve_priv * priv ,
struct gve_tx_alloc_rings_cfg * tx_cfg ,
struct gve_rx_alloc_rings_cfg * rx_cfg )
2019-07-01 15:57:53 -07:00
{
2024-01-22 18:26:29 +00:00
if ( gve_is_gqi ( priv ) ) {
gve_tx_free_rings_gqi ( priv , tx_cfg ) ;
gve_rx_free_rings_gqi ( priv , rx_cfg ) ;
} else {
gve_tx_free_rings_dqo ( priv , tx_cfg ) ;
gve_rx_free_rings_dqo ( priv , rx_cfg ) ;
2019-07-01 15:57:53 -07:00
}
}
2020-09-11 10:38:45 -07:00
int gve_alloc_page ( struct gve_priv * priv , struct device * dev ,
struct page * * page , dma_addr_t * dma ,
2022-01-25 16:38:43 -08:00
enum dma_data_direction dir , gfp_t gfp_flags )
2019-07-01 15:57:53 -07:00
{
2025-07-07 14:01:07 -07:00
* page = alloc_pages_node ( priv - > numa_node , gfp_flags , 0 ) ;
2020-09-11 10:38:45 -07:00
if ( ! * page ) {
priv - > page_alloc_fail + + ;
2019-07-01 15:57:53 -07:00
return - ENOMEM ;
2020-09-11 10:38:45 -07:00
}
2019-07-01 15:57:53 -07:00
* dma = dma_map_page ( dev , * page , 0 , PAGE_SIZE , dir ) ;
if ( dma_mapping_error ( dev , * dma ) ) {
2020-09-11 10:38:45 -07:00
priv - > dma_mapping_error + + ;
2019-07-01 15:57:53 -07:00
put_page ( * page ) ;
return - ENOMEM ;
}
return 0 ;
}
2024-05-01 23:25:48 +00:00
struct gve_queue_page_list * gve_alloc_queue_page_list ( struct gve_priv * priv ,
u32 id , int pages )
2019-07-01 15:57:53 -07:00
{
2024-05-01 23:25:48 +00:00
struct gve_queue_page_list * qpl ;
2019-07-01 15:57:53 -07:00
int err ;
int i ;
2024-05-01 23:25:48 +00:00
qpl = kvzalloc ( sizeof ( * qpl ) , GFP_KERNEL ) ;
if ( ! qpl )
return NULL ;
2019-07-01 15:57:53 -07:00
qpl - > id = id ;
2019-11-26 15:36:19 -08:00
qpl - > num_entries = 0 ;
2021-09-28 16:38:05 -05:00
qpl - > pages = kvcalloc ( pages , sizeof ( * qpl - > pages ) , GFP_KERNEL ) ;
2019-07-01 15:57:53 -07:00
if ( ! qpl - > pages )
2024-05-01 23:25:48 +00:00
goto abort ;
2021-09-28 16:38:05 -05:00
qpl - > page_buses = kvcalloc ( pages , sizeof ( * qpl - > page_buses ) , GFP_KERNEL ) ;
2019-07-01 15:57:53 -07:00
if ( ! qpl - > page_buses )
2024-05-01 23:25:48 +00:00
goto abort ;
2019-07-01 15:57:53 -07:00
for ( i = 0 ; i < pages ; i + + ) {
2020-09-11 10:38:45 -07:00
err = gve_alloc_page ( priv , & priv - > pdev - > dev , & qpl - > pages [ i ] ,
2019-07-01 15:57:53 -07:00
& qpl - > page_buses [ i ] ,
2022-01-25 16:38:43 -08:00
gve_qpl_dma_dir ( priv , id ) , GFP_KERNEL ) ;
2019-07-01 15:57:53 -07:00
if ( err )
2024-05-01 23:25:48 +00:00
goto abort ;
2019-11-26 15:36:19 -08:00
qpl - > num_entries + + ;
2019-07-01 15:57:53 -07:00
}
2024-05-01 23:25:48 +00:00
return qpl ;
abort :
gve_free_queue_page_list ( priv , qpl , id ) ;
return NULL ;
2019-07-01 15:57:53 -07:00
}
void gve_free_page ( struct device * dev , struct page * page , dma_addr_t dma ,
enum dma_data_direction dir )
{
if ( ! dma_mapping_error ( dev , dma ) )
dma_unmap_page ( dev , dma , PAGE_SIZE , dir ) ;
if ( page )
put_page ( page ) ;
}
2024-05-01 23:25:48 +00:00
void gve_free_queue_page_list ( struct gve_priv * priv ,
struct gve_queue_page_list * qpl ,
u32 id )
2019-07-01 15:57:53 -07:00
{
int i ;
2024-05-01 23:25:48 +00:00
if ( ! qpl )
2019-07-01 15:57:53 -07:00
return ;
2024-05-01 23:25:48 +00:00
if ( ! qpl - > pages )
goto free_qpl ;
2019-07-01 15:57:53 -07:00
if ( ! qpl - > page_buses )
goto free_pages ;
for ( i = 0 ; i < qpl - > num_entries ; i + + )
gve_free_page ( & priv - > pdev - > dev , qpl - > pages [ i ] ,
qpl - > page_buses [ i ] , gve_qpl_dma_dir ( priv , id ) ) ;
2019-07-17 10:05:11 +08:00
kvfree ( qpl - > page_buses ) ;
2023-03-15 16:33:09 -07:00
qpl - > page_buses = NULL ;
2019-07-01 15:57:53 -07:00
free_pages :
2019-07-17 10:05:11 +08:00
kvfree ( qpl - > pages ) ;
2023-03-15 16:33:09 -07:00
qpl - > pages = NULL ;
2024-05-01 23:25:48 +00:00
free_qpl :
kvfree ( qpl ) ;
2019-07-01 15:57:53 -07:00
}
2019-07-01 15:57:54 -07:00
/* Use this to schedule a reset when the device is capable of continuing
* to handle other requests in its current state . If it is not , do a reset
* in thread instead .
*/
void gve_schedule_reset ( struct gve_priv * priv )
{
gve_set_do_reset ( priv ) ;
queue_work ( priv - > gve_wq , & priv - > service_task ) ;
}
static void gve_reset_and_teardown ( struct gve_priv * priv , bool was_up ) ;
static int gve_reset_recovery ( struct gve_priv * priv , bool was_up ) ;
2019-07-01 15:57:53 -07:00
static void gve_turndown ( struct gve_priv * priv ) ;
static void gve_turnup ( struct gve_priv * priv ) ;
2025-07-17 08:28:35 -07:00
static void gve_unreg_xsk_pool ( struct gve_priv * priv , u16 qid )
{
struct gve_rx_ring * rx ;
if ( ! priv - > rx )
return ;
rx = & priv - > rx [ qid ] ;
rx - > xsk_pool = NULL ;
2025-07-17 08:28:36 -07:00
if ( xdp_rxq_info_is_reg ( & rx - > xdp_rxq ) )
xdp_rxq_info_unreg_mem_model ( & rx - > xdp_rxq ) ;
2025-07-17 08:28:35 -07:00
if ( ! priv - > tx )
return ;
priv - > tx [ gve_xdp_tx_queue_id ( priv , qid ) ] . xsk_pool = NULL ;
}
static int gve_reg_xsk_pool ( struct gve_priv * priv , struct net_device * dev ,
struct xsk_buff_pool * pool , u16 qid )
{
struct gve_rx_ring * rx ;
u16 tx_qid ;
int err ;
rx = & priv - > rx [ qid ] ;
2025-07-17 08:28:36 -07:00
err = xdp_rxq_info_reg_mem_model ( & rx - > xdp_rxq ,
2025-07-17 08:28:35 -07:00
MEM_TYPE_XSK_BUFF_POOL , pool ) ;
if ( err ) {
gve_unreg_xsk_pool ( priv , qid ) ;
return err ;
}
rx - > xsk_pool = pool ;
tx_qid = gve_xdp_tx_queue_id ( priv , qid ) ;
priv - > tx [ tx_qid ] . xsk_pool = pool ;
return 0 ;
}
static void gve_unreg_xdp_info ( struct gve_priv * priv )
{
int i ;
if ( ! priv - > tx_cfg . num_xdp_queues | | ! priv - > rx )
return ;
for ( i = 0 ; i < priv - > rx_cfg . num_queues ; i + + ) {
struct gve_rx_ring * rx = & priv - > rx [ i ] ;
if ( xdp_rxq_info_is_reg ( & rx - > xdp_rxq ) )
xdp_rxq_info_unreg ( & rx - > xdp_rxq ) ;
gve_unreg_xsk_pool ( priv , i ) ;
}
}
2025-07-17 08:28:37 -07:00
static struct xsk_buff_pool * gve_get_xsk_pool ( struct gve_priv * priv , int qid )
{
if ( ! test_bit ( qid , priv - > xsk_pools ) )
return NULL ;
return xsk_get_pool_from_qid ( priv - > dev , qid ) ;
}
2023-03-15 16:33:10 -07:00
static int gve_reg_xdp_info ( struct gve_priv * priv , struct net_device * dev )
{
struct napi_struct * napi ;
struct gve_rx_ring * rx ;
int err = 0 ;
2025-07-17 08:28:35 -07:00
int i ;
2023-03-15 16:33:10 -07:00
2025-03-21 00:29:06 +00:00
if ( ! priv - > tx_cfg . num_xdp_queues )
2023-03-15 16:33:10 -07:00
return 0 ;
for ( i = 0 ; i < priv - > rx_cfg . num_queues ; i + + ) {
2025-07-17 08:28:36 -07:00
struct xsk_buff_pool * xsk_pool ;
2023-03-15 16:33:10 -07:00
rx = & priv - > rx [ i ] ;
napi = & priv - > ntfy_blocks [ rx - > ntfy_id ] . napi ;
err = xdp_rxq_info_reg ( & rx - > xdp_rxq , dev , i ,
napi - > napi_id ) ;
if ( err )
goto err ;
2025-07-17 08:28:36 -07:00
2025-07-17 08:28:37 -07:00
xsk_pool = gve_get_xsk_pool ( priv , i ) ;
2025-07-17 08:28:36 -07:00
if ( xsk_pool )
err = gve_reg_xsk_pool ( priv , dev , xsk_pool , i ) ;
else if ( gve_is_qpl ( priv ) )
gve: update XDP allocation path support RX buffer posting
In order to support installing an XDP program on DQ, RX buffers need to
be reposted using 4K buffers, which is larger than the default packet
buffer size of 2K. This is needed to accommodate the extra head and tail
that accompanies the data portion of an XDP buffer. Continuing to use 2K
buffers would mean that the packet buffer size for the NIC would have to
be restricted to 2048 - 320 - 256 = 1472B. However, this is problematic
for two reasons: first, 1472 is not a packet buffer size accepted by
GVE; second, at least 1474B of buffer space is needed to accommodate an
MTU of 1460, which is the default on GCP. As such, we allocate 4K
buffers, and post a 2K section of those 4K buffers (offset relative to
the XDP headroom) to the NIC for DMA to avoid a potential extra copy.
Because the GQ-QPL datapath requires copies regardless, this change was
not needed to support XDP in that case.
To capture this subtlety, a new field, packet_buffer_truesize, has been
added to the rx ring struct to represent size of the allocated buffer,
while packet_buffer_size has been left to represent the portion of the
buffer posted to the NIC.
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250321002910.1343422-6-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-03-21 00:29:09 +00:00
err = xdp_rxq_info_reg_mem_model ( & rx - > xdp_rxq ,
MEM_TYPE_PAGE_SHARED ,
NULL ) ;
else
err = xdp_rxq_info_reg_mem_model ( & rx - > xdp_rxq ,
MEM_TYPE_PAGE_POOL ,
rx - > dqo . page_pool ) ;
2023-03-15 16:33:10 -07:00
if ( err )
goto err ;
}
return 0 ;
err :
2025-07-17 08:28:35 -07:00
gve_unreg_xdp_info ( priv ) ;
2023-03-15 16:33:10 -07:00
return err ;
}
2023-03-15 16:33:11 -07:00
static void gve_drain_page_cache ( struct gve_priv * priv )
{
int i ;
2024-02-28 17:30:10 +08:00
for ( i = 0 ; i < priv - > rx_cfg . num_queues ; i + + )
page_frag_cache_drain ( & priv - > rx [ i ] . page_cache ) ;
2023-03-15 16:33:11 -07:00
}
2024-01-22 18:26:29 +00:00
static void gve_rx_get_curr_alloc_cfg ( struct gve_priv * priv ,
struct gve_rx_alloc_rings_cfg * cfg )
{
2025-03-21 00:29:06 +00:00
cfg - > qcfg_rx = & priv - > rx_cfg ;
2024-01-22 18:26:29 +00:00
cfg - > qcfg_tx = & priv - > tx_cfg ;
cfg - > raw_addressing = ! gve_is_qpl ( priv ) ;
2024-02-29 13:22:35 -08:00
cfg - > enable_header_split = priv - > header_split_enabled ;
2024-01-22 18:26:29 +00:00
cfg - > ring_size = priv - > rx_desc_cnt ;
2025-03-21 00:29:08 +00:00
cfg - > packet_buffer_size = priv - > rx_cfg . packet_buffer_size ;
2024-01-22 18:26:29 +00:00
cfg - > rx = priv - > rx ;
gve: update XDP allocation path support RX buffer posting
In order to support installing an XDP program on DQ, RX buffers need to
be reposted using 4K buffers, which is larger than the default packet
buffer size of 2K. This is needed to accommodate the extra head and tail
that accompanies the data portion of an XDP buffer. Continuing to use 2K
buffers would mean that the packet buffer size for the NIC would have to
be restricted to 2048 - 320 - 256 = 1472B. However, this is problematic
for two reasons: first, 1472 is not a packet buffer size accepted by
GVE; second, at least 1474B of buffer space is needed to accommodate an
MTU of 1460, which is the default on GCP. As such, we allocate 4K
buffers, and post a 2K section of those 4K buffers (offset relative to
the XDP headroom) to the NIC for DMA to avoid a potential extra copy.
Because the GQ-QPL datapath requires copies regardless, this change was
not needed to support XDP in that case.
To capture this subtlety, a new field, packet_buffer_truesize, has been
added to the rx ring struct to represent size of the allocated buffer,
while packet_buffer_size has been left to represent the portion of the
buffer posted to the NIC.
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250321002910.1343422-6-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-03-21 00:29:09 +00:00
cfg - > xdp = ! ! cfg - > qcfg_tx - > num_xdp_queues ;
2024-01-22 18:26:29 +00:00
}
2024-04-01 23:45:30 +00:00
void gve_get_curr_alloc_cfgs ( struct gve_priv * priv ,
struct gve_tx_alloc_rings_cfg * tx_alloc_cfg ,
struct gve_rx_alloc_rings_cfg * rx_alloc_cfg )
2024-01-22 18:26:29 +00:00
{
gve_tx_get_curr_alloc_cfg ( priv , tx_alloc_cfg ) ;
gve_rx_get_curr_alloc_cfg ( priv , rx_alloc_cfg ) ;
}
2024-05-01 23:25:49 +00:00
static void gve_rx_start_ring ( struct gve_priv * priv , int i )
{
if ( gve_is_gqi ( priv ) )
gve_rx_start_ring_gqi ( priv , i ) ;
else
gve_rx_start_ring_dqo ( priv , i ) ;
}
2024-01-22 18:26:29 +00:00
static void gve_rx_start_rings ( struct gve_priv * priv , int num_rings )
{
int i ;
2024-05-01 23:25:49 +00:00
for ( i = 0 ; i < num_rings ; i + + )
gve_rx_start_ring ( priv , i ) ;
}
static void gve_rx_stop_ring ( struct gve_priv * priv , int i )
{
if ( gve_is_gqi ( priv ) )
gve_rx_stop_ring_gqi ( priv , i ) ;
else
gve_rx_stop_ring_dqo ( priv , i ) ;
2024-01-22 18:26:29 +00:00
}
static void gve_rx_stop_rings ( struct gve_priv * priv , int num_rings )
{
int i ;
if ( ! priv - > rx )
return ;
2024-05-01 23:25:49 +00:00
for ( i = 0 ; i < num_rings ; i + + )
gve_rx_stop_ring ( priv , i ) ;
2024-01-22 18:26:29 +00:00
}
2024-01-22 18:26:30 +00:00
static void gve_queues_mem_remove ( struct gve_priv * priv )
2019-07-01 15:57:53 -07:00
{
2024-01-22 18:26:29 +00:00
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = { 0 } ;
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = { 0 } ;
2024-01-22 18:26:30 +00:00
2024-05-01 23:25:48 +00:00
gve_get_curr_alloc_cfgs ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
gve_queues_mem_free ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-01-22 18:26:30 +00:00
priv - > tx = NULL ;
priv - > rx = NULL ;
}
/* The passed-in queue memory is stored into priv and the queues are made live.
* No memory is allocated . Passed - in memory is freed on errors .
*/
static int gve_queues_start ( struct gve_priv * priv ,
struct gve_tx_alloc_rings_cfg * tx_alloc_cfg ,
struct gve_rx_alloc_rings_cfg * rx_alloc_cfg )
{
struct net_device * dev = priv - > dev ;
2019-07-01 15:57:53 -07:00
int err ;
2024-01-22 18:26:30 +00:00
/* Record new resources into priv */
priv - > tx = tx_alloc_cfg - > tx ;
priv - > rx = rx_alloc_cfg - > rx ;
/* Record new configs into priv */
priv - > tx_cfg = * tx_alloc_cfg - > qcfg ;
2025-03-21 00:29:06 +00:00
priv - > tx_cfg . num_xdp_queues = tx_alloc_cfg - > num_xdp_rings ;
priv - > rx_cfg = * rx_alloc_cfg - > qcfg_rx ;
2024-01-22 18:26:30 +00:00
priv - > tx_desc_cnt = tx_alloc_cfg - > ring_size ;
priv - > rx_desc_cnt = rx_alloc_cfg - > ring_size ;
2025-03-21 00:29:06 +00:00
gve_tx_start_rings ( priv , gve_num_tx_queues ( priv ) ) ;
gve_rx_start_rings ( priv , rx_alloc_cfg - > qcfg_rx - > num_queues ) ;
2024-01-22 18:26:29 +00:00
gve_init_sync_stats ( priv ) ;
2019-07-01 15:57:53 -07:00
err = netif_set_real_num_tx_queues ( dev , priv - > tx_cfg . num_queues ) ;
if ( err )
2024-01-22 18:26:30 +00:00
goto stop_and_free_rings ;
2019-07-01 15:57:53 -07:00
err = netif_set_real_num_rx_queues ( dev , priv - > rx_cfg . num_queues ) ;
if ( err )
2024-01-22 18:26:30 +00:00
goto stop_and_free_rings ;
2019-07-01 15:57:53 -07:00
2023-03-15 16:33:10 -07:00
err = gve_reg_xdp_info ( priv , dev ) ;
if ( err )
2024-01-22 18:26:30 +00:00
goto stop_and_free_rings ;
2023-03-15 16:33:10 -07:00
2025-02-19 12:04:51 -08:00
if ( rx_alloc_cfg - > reset_rss ) {
err = gve_init_rss_config ( priv , priv - > rx_cfg . num_queues ) ;
if ( err )
goto reset ;
}
2019-07-01 15:57:53 -07:00
err = gve_register_qpls ( priv ) ;
if ( err )
2019-07-01 15:57:54 -07:00
goto reset ;
2021-06-24 11:06:28 -07:00
2024-02-29 13:22:35 -08:00
priv - > header_split_enabled = rx_alloc_cfg - > enable_header_split ;
2025-03-21 00:29:08 +00:00
priv - > rx_cfg . packet_buffer_size = rx_alloc_cfg - > packet_buffer_size ;
2024-02-29 13:22:35 -08:00
2019-07-01 15:57:53 -07:00
err = gve_create_rings ( priv ) ;
if ( err )
2019-07-01 15:57:54 -07:00
goto reset ;
2021-06-24 11:06:28 -07:00
2019-07-01 15:57:53 -07:00
gve_set_device_rings_ok ( priv ) ;
2020-09-11 10:38:47 -07:00
if ( gve_get_report_stats ( priv ) )
mod_timer ( & priv - > stats_report_timer ,
round_jiffies ( jiffies +
msecs_to_jiffies ( priv - > stats_report_timer_period ) ) ) ;
2019-07-01 15:57:53 -07:00
gve_turnup ( priv ) ;
2020-09-11 10:38:50 -07:00
queue_work ( priv - > gve_wq , & priv - > service_task ) ;
2020-09-11 10:38:45 -07:00
priv - > interface_up_cnt + + ;
2019-07-01 15:57:53 -07:00
return 0 ;
2019-07-01 15:57:54 -07:00
reset :
if ( gve_get_reset_in_progress ( priv ) )
2024-01-22 18:26:30 +00:00
goto stop_and_free_rings ;
2019-07-01 15:57:54 -07:00
gve_reset_and_teardown ( priv , true ) ;
/* if this fails there is nothing we can do so just ignore the return */
gve_reset_recovery ( priv , false ) ;
/* return the original error */
return err ;
2024-01-22 18:26:30 +00:00
stop_and_free_rings :
2025-03-21 00:29:06 +00:00
gve_tx_stop_rings ( priv , gve_num_tx_queues ( priv ) ) ;
2024-01-22 18:26:30 +00:00
gve_rx_stop_rings ( priv , priv - > rx_cfg . num_queues ) ;
gve_queues_mem_remove ( priv ) ;
return err ;
2019-07-01 15:57:53 -07:00
}
2024-01-22 18:26:30 +00:00
static int gve_open ( struct net_device * dev )
2019-07-01 15:57:53 -07:00
{
2024-01-22 18:26:29 +00:00
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = { 0 } ;
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = { 0 } ;
2019-07-01 15:57:53 -07:00
struct gve_priv * priv = netdev_priv ( dev ) ;
int err ;
2024-05-01 23:25:48 +00:00
gve_get_curr_alloc_cfgs ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-01-22 18:26:30 +00:00
2024-05-01 23:25:48 +00:00
err = gve_queues_mem_alloc ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-01-22 18:26:30 +00:00
if ( err )
return err ;
/* No need to free on error: ownership of resources is lost after
* calling gve_queues_start .
*/
2024-05-01 23:25:48 +00:00
err = gve_queues_start ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-01-22 18:26:30 +00:00
if ( err )
return err ;
return 0 ;
}
static int gve_queues_stop ( struct gve_priv * priv )
{
int err ;
netif_carrier_off ( priv - > dev ) ;
2019-07-01 15:57:53 -07:00
if ( gve_get_device_rings_ok ( priv ) ) {
gve_turndown ( priv ) ;
2023-03-15 16:33:11 -07:00
gve_drain_page_cache ( priv ) ;
2019-07-01 15:57:53 -07:00
err = gve_destroy_rings ( priv ) ;
if ( err )
2019-07-01 15:57:54 -07:00
goto err ;
2019-07-01 15:57:53 -07:00
err = gve_unregister_qpls ( priv ) ;
if ( err )
2019-07-01 15:57:54 -07:00
goto err ;
2019-07-01 15:57:53 -07:00
gve_clear_device_rings_ok ( priv ) ;
}
2025-04-05 10:17:26 +02:00
timer_delete_sync ( & priv - > stats_report_timer ) ;
2019-07-01 15:57:53 -07:00
2023-03-15 16:33:10 -07:00
gve_unreg_xdp_info ( priv ) ;
2024-01-22 18:26:29 +00:00
2025-03-21 00:29:06 +00:00
gve_tx_stop_rings ( priv , gve_num_tx_queues ( priv ) ) ;
2024-01-22 18:26:30 +00:00
gve_rx_stop_rings ( priv , priv - > rx_cfg . num_queues ) ;
2024-01-22 18:26:29 +00:00
2020-09-11 10:38:45 -07:00
priv - > interface_down_cnt + + ;
2019-07-01 15:57:53 -07:00
return 0 ;
2019-07-01 15:57:54 -07:00
err :
/* This must have been called from a reset due to the rtnl lock
* so just return at this point .
*/
if ( gve_get_reset_in_progress ( priv ) )
return err ;
/* Otherwise reset before returning */
gve_reset_and_teardown ( priv , true ) ;
return gve_reset_recovery ( priv , false ) ;
2019-07-01 15:57:53 -07:00
}
2024-01-22 18:26:30 +00:00
static int gve_close ( struct net_device * dev )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
int err ;
err = gve_queues_stop ( priv ) ;
if ( err )
return err ;
gve_queues_mem_remove ( priv ) ;
return 0 ;
}
2023-03-15 16:33:10 -07:00
static void gve_handle_link_status ( struct gve_priv * priv , bool link_status )
{
if ( ! gve_get_napi_enabled ( priv ) )
return ;
if ( link_status = = netif_carrier_ok ( priv - > dev ) )
return ;
if ( link_status ) {
netdev_info ( priv - > dev , " Device link is up. \n " ) ;
netif_carrier_on ( priv - > dev ) ;
} else {
netdev_info ( priv - > dev , " Device link is down. \n " ) ;
netif_carrier_off ( priv - > dev ) ;
}
}
2025-03-21 00:29:06 +00:00
static int gve_configure_rings_xdp ( struct gve_priv * priv ,
u16 num_xdp_rings )
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = { 0 } ;
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = { 0 } ;
gve_get_curr_alloc_cfgs ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
tx_alloc_cfg . num_xdp_rings = num_xdp_rings ;
gve: update XDP allocation path support RX buffer posting
In order to support installing an XDP program on DQ, RX buffers need to
be reposted using 4K buffers, which is larger than the default packet
buffer size of 2K. This is needed to accommodate the extra head and tail
that accompanies the data portion of an XDP buffer. Continuing to use 2K
buffers would mean that the packet buffer size for the NIC would have to
be restricted to 2048 - 320 - 256 = 1472B. However, this is problematic
for two reasons: first, 1472 is not a packet buffer size accepted by
GVE; second, at least 1474B of buffer space is needed to accommodate an
MTU of 1460, which is the default on GCP. As such, we allocate 4K
buffers, and post a 2K section of those 4K buffers (offset relative to
the XDP headroom) to the NIC for DMA to avoid a potential extra copy.
Because the GQ-QPL datapath requires copies regardless, this change was
not needed to support XDP in that case.
To capture this subtlety, a new field, packet_buffer_truesize, has been
added to the rx ring struct to represent size of the allocated buffer,
while packet_buffer_size has been left to represent the portion of the
buffer posted to the NIC.
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250321002910.1343422-6-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-03-21 00:29:09 +00:00
rx_alloc_cfg . xdp = ! ! num_xdp_rings ;
2025-03-21 00:29:06 +00:00
return gve_adjust_config ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
}
2023-03-15 16:33:10 -07:00
static int gve_set_xdp ( struct gve_priv * priv , struct bpf_prog * prog ,
struct netlink_ext_ack * extack )
{
struct bpf_prog * old_prog ;
int err = 0 ;
u32 status ;
old_prog = READ_ONCE ( priv - > xdp_prog ) ;
2024-08-01 13:56:19 -07:00
if ( ! netif_running ( priv - > dev ) ) {
2023-03-15 16:33:10 -07:00
WRITE_ONCE ( priv - > xdp_prog , prog ) ;
if ( old_prog )
bpf_prog_put ( old_prog ) ;
2025-03-21 00:29:06 +00:00
/* Update priv XDP queue configuration */
priv - > tx_cfg . num_xdp_queues = priv - > xdp_prog ?
priv - > rx_cfg . num_queues : 0 ;
2023-03-15 16:33:10 -07:00
return 0 ;
}
2025-03-21 00:29:06 +00:00
if ( ! old_prog & & prog )
err = gve_configure_rings_xdp ( priv , priv - > rx_cfg . num_queues ) ;
else if ( old_prog & & ! prog )
err = gve_configure_rings_xdp ( priv , 0 ) ;
if ( err )
goto out ;
2023-03-15 16:33:10 -07:00
WRITE_ONCE ( priv - > xdp_prog , prog ) ;
if ( old_prog )
bpf_prog_put ( old_prog ) ;
out :
status = ioread32be ( & priv - > reg_bar0 - > device_status ) ;
gve_handle_link_status ( priv , GVE_DEVICE_STATUS_LINK_STATUS_MASK & status ) ;
return err ;
}
2025-06-18 20:56:11 +00:00
static int gve_xdp_xmit ( struct net_device * dev , int n ,
struct xdp_frame * * frames , u32 flags )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
2025-06-18 20:56:13 +00:00
if ( priv - > queue_format = = GVE_GQI_QPL_FORMAT )
2025-06-18 20:56:11 +00:00
return gve_xdp_xmit_gqi ( dev , n , frames , flags ) ;
2025-06-18 20:56:13 +00:00
else if ( priv - > queue_format = = GVE_DQO_RDA_FORMAT )
return gve_xdp_xmit_dqo ( dev , n , frames , flags ) ;
2025-06-18 20:56:11 +00:00
return - EOPNOTSUPP ;
}
2023-03-15 16:33:12 -07:00
static int gve_xsk_pool_enable ( struct net_device * dev ,
struct xsk_buff_pool * pool ,
u16 qid )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
int err ;
if ( qid > = priv - > rx_cfg . num_queues ) {
dev_err ( & priv - > pdev - > dev , " xsk pool invalid qid %d " , qid ) ;
return - EINVAL ;
}
if ( xsk_pool_get_rx_frame_size ( pool ) <
priv - > dev - > max_mtu + sizeof ( struct ethhdr ) ) {
dev_err ( & priv - > pdev - > dev , " xsk pool frame_len too small " ) ;
return - EINVAL ;
}
err = xsk_pool_dma_map ( pool , & priv - > pdev - > dev ,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING ) ;
if ( err )
return err ;
2025-07-17 08:28:37 -07:00
set_bit ( qid , priv - > xsk_pools ) ;
2024-12-18 05:34:13 -08:00
/* If XDP prog is not installed or interface is down, return. */
if ( ! priv - > xdp_prog | | ! netif_running ( dev ) )
2023-03-15 16:33:12 -07:00
return 0 ;
2025-07-17 08:28:35 -07:00
err = gve_reg_xsk_pool ( priv , dev , pool , qid ) ;
2025-07-17 08:28:39 -07:00
if ( err )
goto err_xsk_pool_dma_mapped ;
/* Stop and start RDA queues to repost buffers. */
if ( ! gve_is_qpl ( priv ) ) {
err = gve_configure_rings_xdp ( priv , priv - > rx_cfg . num_queues ) ;
if ( err )
goto err_xsk_pool_registered ;
2025-07-17 08:28:37 -07:00
}
2025-07-17 08:28:39 -07:00
return 0 ;
2023-03-15 16:33:12 -07:00
2025-07-17 08:28:39 -07:00
err_xsk_pool_registered :
gve_unreg_xsk_pool ( priv , qid ) ;
err_xsk_pool_dma_mapped :
clear_bit ( qid , priv - > xsk_pools ) ;
xsk_pool_dma_unmap ( pool ,
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_WEAK_ORDERING ) ;
2023-03-15 16:33:12 -07:00
return err ;
}
static int gve_xsk_pool_disable ( struct net_device * dev ,
u16 qid )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct napi_struct * napi_rx ;
struct napi_struct * napi_tx ;
struct xsk_buff_pool * pool ;
int tx_qid ;
2025-07-17 08:28:39 -07:00
int err ;
2023-03-15 16:33:12 -07:00
if ( qid > = priv - > rx_cfg . num_queues )
return - EINVAL ;
2025-07-17 08:28:37 -07:00
clear_bit ( qid , priv - > xsk_pools ) ;
2025-07-17 08:28:35 -07:00
pool = xsk_get_pool_from_qid ( dev , qid ) ;
if ( pool )
xsk_pool_dma_unmap ( pool ,
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_WEAK_ORDERING ) ;
if ( ! netif_running ( dev ) | | ! priv - > tx_cfg . num_xdp_queues )
return 0 ;
2023-03-15 16:33:12 -07:00
2025-07-17 08:28:39 -07:00
/* Stop and start RDA queues to repost buffers. */
if ( ! gve_is_qpl ( priv ) & & priv - > xdp_prog ) {
err = gve_configure_rings_xdp ( priv , priv - > rx_cfg . num_queues ) ;
if ( err )
return err ;
}
2023-03-15 16:33:12 -07:00
napi_rx = & priv - > ntfy_blocks [ priv - > rx [ qid ] . ntfy_id ] . napi ;
napi_disable ( napi_rx ) ; /* make sure current rx poll is done */
2024-12-18 05:34:13 -08:00
tx_qid = gve_xdp_tx_queue_id ( priv , qid ) ;
2023-03-15 16:33:12 -07:00
napi_tx = & priv - > ntfy_blocks [ priv - > tx [ tx_qid ] . ntfy_id ] . napi ;
napi_disable ( napi_tx ) ; /* make sure current tx poll is done */
2025-07-17 08:28:35 -07:00
gve_unreg_xsk_pool ( priv , qid ) ;
2023-03-15 16:33:12 -07:00
smp_mb ( ) ; /* Make sure it is visible to the workers on datapath */
napi_enable ( napi_rx ) ;
napi_enable ( napi_tx ) ;
2025-07-17 08:28:39 -07:00
if ( gve_is_gqi ( priv ) ) {
if ( gve_rx_work_pending ( & priv - > rx [ qid ] ) )
napi_schedule ( napi_rx ) ;
if ( gve_tx_clean_pending ( priv , & priv - > tx [ tx_qid ] ) )
napi_schedule ( napi_tx ) ;
}
2023-03-15 16:33:12 -07:00
return 0 ;
}
static int gve_xsk_wakeup ( struct net_device * dev , u32 queue_id , u32 flags )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
2024-12-20 19:28:06 -08:00
struct napi_struct * napi ;
2023-03-15 16:33:12 -07:00
2024-12-18 05:34:13 -08:00
if ( ! gve_get_napi_enabled ( priv ) )
return - ENETDOWN ;
2023-03-15 16:33:12 -07:00
if ( queue_id > = priv - > rx_cfg . num_queues | | ! priv - > xdp_prog )
return - EINVAL ;
2024-12-20 19:28:06 -08:00
napi = & priv - > ntfy_blocks [ gve_rx_idx_to_ntfy ( priv , queue_id ) ] . napi ;
if ( ! napi_if_scheduled_mark_missed ( napi ) ) {
/* Call local_bh_enable to trigger SoftIRQ processing */
local_bh_disable ( ) ;
napi_schedule ( napi ) ;
local_bh_enable ( ) ;
2023-03-15 16:33:12 -07:00
}
return 0 ;
}
2023-03-15 16:33:10 -07:00
static int verify_xdp_configuration ( struct net_device * dev )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
gve: update XDP allocation path support RX buffer posting
In order to support installing an XDP program on DQ, RX buffers need to
be reposted using 4K buffers, which is larger than the default packet
buffer size of 2K. This is needed to accommodate the extra head and tail
that accompanies the data portion of an XDP buffer. Continuing to use 2K
buffers would mean that the packet buffer size for the NIC would have to
be restricted to 2048 - 320 - 256 = 1472B. However, this is problematic
for two reasons: first, 1472 is not a packet buffer size accepted by
GVE; second, at least 1474B of buffer space is needed to accommodate an
MTU of 1460, which is the default on GCP. As such, we allocate 4K
buffers, and post a 2K section of those 4K buffers (offset relative to
the XDP headroom) to the NIC for DMA to avoid a potential extra copy.
Because the GQ-QPL datapath requires copies regardless, this change was
not needed to support XDP in that case.
To capture this subtlety, a new field, packet_buffer_truesize, has been
added to the rx ring struct to represent size of the allocated buffer,
while packet_buffer_size has been left to represent the portion of the
buffer posted to the NIC.
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250321002910.1343422-6-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-03-21 00:29:09 +00:00
u16 max_xdp_mtu ;
2023-03-15 16:33:10 -07:00
if ( dev - > features & NETIF_F_LRO ) {
netdev_warn ( dev , " XDP is not supported when LRO is on. \n " ) ;
return - EOPNOTSUPP ;
}
2025-06-18 20:56:13 +00:00
if ( priv - > header_split_enabled ) {
netdev_warn ( dev , " XDP is not supported when header-data split is enabled. \n " ) ;
2023-03-15 16:33:10 -07:00
return - EOPNOTSUPP ;
}
gve: update XDP allocation path support RX buffer posting
In order to support installing an XDP program on DQ, RX buffers need to
be reposted using 4K buffers, which is larger than the default packet
buffer size of 2K. This is needed to accommodate the extra head and tail
that accompanies the data portion of an XDP buffer. Continuing to use 2K
buffers would mean that the packet buffer size for the NIC would have to
be restricted to 2048 - 320 - 256 = 1472B. However, this is problematic
for two reasons: first, 1472 is not a packet buffer size accepted by
GVE; second, at least 1474B of buffer space is needed to accommodate an
MTU of 1460, which is the default on GCP. As such, we allocate 4K
buffers, and post a 2K section of those 4K buffers (offset relative to
the XDP headroom) to the NIC for DMA to avoid a potential extra copy.
Because the GQ-QPL datapath requires copies regardless, this change was
not needed to support XDP in that case.
To capture this subtlety, a new field, packet_buffer_truesize, has been
added to the rx ring struct to represent size of the allocated buffer,
while packet_buffer_size has been left to represent the portion of the
buffer posted to the NIC.
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250321002910.1343422-6-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-03-21 00:29:09 +00:00
max_xdp_mtu = priv - > rx_cfg . packet_buffer_size - sizeof ( struct ethhdr ) ;
if ( priv - > queue_format = = GVE_GQI_QPL_FORMAT )
max_xdp_mtu - = GVE_RX_PAD ;
if ( dev - > mtu > max_xdp_mtu ) {
2023-03-15 16:33:10 -07:00
netdev_warn ( dev , " XDP is not supported for mtu %d. \n " ,
dev - > mtu ) ;
return - EOPNOTSUPP ;
}
if ( priv - > rx_cfg . num_queues ! = priv - > tx_cfg . num_queues | |
( 2 * priv - > tx_cfg . num_queues > priv - > tx_cfg . max_queues ) ) {
netdev_warn ( dev , " XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d " ,
priv - > rx_cfg . num_queues ,
priv - > tx_cfg . num_queues ,
priv - > tx_cfg . max_queues ) ;
return - EINVAL ;
}
return 0 ;
}
static int gve_xdp ( struct net_device * dev , struct netdev_bpf * xdp )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
int err ;
err = verify_xdp_configuration ( dev ) ;
if ( err )
return err ;
switch ( xdp - > command ) {
case XDP_SETUP_PROG :
return gve_set_xdp ( priv , xdp - > prog , xdp - > extack ) ;
2023-03-15 16:33:12 -07:00
case XDP_SETUP_XSK_POOL :
if ( xdp - > xsk . pool )
return gve_xsk_pool_enable ( dev , xdp - > xsk . pool , xdp - > xsk . queue_id ) ;
else
return gve_xsk_pool_disable ( dev , xdp - > xsk . queue_id ) ;
2023-03-15 16:33:10 -07:00
default :
return - EINVAL ;
}
}
2025-02-19 12:04:51 -08:00
int gve_init_rss_config ( struct gve_priv * priv , u16 num_queues )
{
struct gve_rss_config * rss_config = & priv - > rss_config ;
struct ethtool_rxfh_param rxfh = { 0 } ;
u16 i ;
if ( ! priv - > cache_rss_config )
return 0 ;
for ( i = 0 ; i < priv - > rss_lut_size ; i + + )
rss_config - > hash_lut [ i ] =
ethtool_rxfh_indir_default ( i , num_queues ) ;
netdev_rss_key_fill ( rss_config - > hash_key , priv - > rss_key_size ) ;
rxfh . hfunc = ETH_RSS_HASH_TOP ;
return gve_adminq_configure_rss ( priv , & rxfh ) ;
}
2024-06-25 00:12:31 +00:00
int gve_flow_rules_reset ( struct gve_priv * priv )
{
if ( ! priv - > max_flow_rules )
return 0 ;
return gve_adminq_reset_flow_rules ( priv ) ;
}
2024-04-01 23:45:30 +00:00
int gve_adjust_config ( struct gve_priv * priv ,
struct gve_tx_alloc_rings_cfg * tx_alloc_cfg ,
struct gve_rx_alloc_rings_cfg * rx_alloc_cfg )
2024-01-22 18:26:31 +00:00
{
int err ;
2025-06-15 22:45:00 -07:00
/* Allocate resources for the new configuration */
2024-05-01 23:25:48 +00:00
err = gve_queues_mem_alloc ( priv , tx_alloc_cfg , rx_alloc_cfg ) ;
2024-01-22 18:26:31 +00:00
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" Adjust config failed to alloc new queues " ) ;
return err ;
}
/* Teardown the device and free existing resources */
err = gve_close ( priv - > dev ) ;
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" Adjust config failed to close old queues " ) ;
2024-05-01 23:25:48 +00:00
gve_queues_mem_free ( priv , tx_alloc_cfg , rx_alloc_cfg ) ;
2024-01-22 18:26:31 +00:00
return err ;
}
/* Bring the device back up again with the new resources. */
2024-05-01 23:25:48 +00:00
err = gve_queues_start ( priv , tx_alloc_cfg , rx_alloc_cfg ) ;
2024-01-22 18:26:31 +00:00
if ( err ) {
netif_err ( priv , drv , priv - > dev ,
" Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!! \n " ) ;
/* No need to free on error: ownership of resources is lost after
* calling gve_queues_start .
*/
gve_turndown ( priv ) ;
return err ;
}
return 0 ;
}
2019-07-01 15:57:55 -07:00
int gve_adjust_queues ( struct gve_priv * priv ,
2025-03-21 00:29:06 +00:00
struct gve_rx_queue_config new_rx_config ,
struct gve_tx_queue_config new_tx_config ,
2025-02-19 12:04:51 -08:00
bool reset_rss )
2019-07-01 15:57:55 -07:00
{
2024-01-22 18:26:31 +00:00
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = { 0 } ;
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = { 0 } ;
2019-07-01 15:57:55 -07:00
int err ;
2024-05-01 23:25:48 +00:00
gve_get_curr_alloc_cfgs ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2019-07-01 15:57:55 -07:00
2024-01-22 18:26:31 +00:00
/* Relay the new config from ethtool */
tx_alloc_cfg . qcfg = & new_tx_config ;
rx_alloc_cfg . qcfg_tx = & new_tx_config ;
2025-03-21 00:29:06 +00:00
rx_alloc_cfg . qcfg_rx = & new_rx_config ;
2025-02-19 12:04:51 -08:00
rx_alloc_cfg . reset_rss = reset_rss ;
gve: fix XDP allocation path in edge cases
This patch fixes a number of consistency issues in the queue allocation
path related to XDP.
As it stands, the number of allocated XDP queues changes in three
different scenarios.
1) Adding an XDP program while the interface is up via
gve_add_xdp_queues
2) Removing an XDP program while the interface is up via
gve_remove_xdp_queues
3) After queues have been allocated and the old queue memory has been
removed in gve_queues_start.
However, the requirement for the interface to be up for
gve_(add|remove)_xdp_queues to be called, in conjunction with the fact
that the number of queues stored in priv isn't updated until _after_ XDP
queues have been allocated in the normal queue allocation path means
that if an XDP program is added while the interface is down, XDP queues
won't be added until the _second_ if_up, not the first.
Given the expectation that the number of XDP queues is equal to the
number of RX queues, scenario (3) has another problematic implication.
When changing the number of queues while an XDP program is loaded, the
number of XDP queues must be updated as well, as there is logic in the
driver (gve_xdp_tx_queue_id()) which relies on every RX queue having a
corresponding XDP TX queue. However, the number of XDP queues stored in
priv would not be updated until _after_ a close/open leading to a
mismatch in the number of XDP queues reported vs the number of XDP
queues which actually exist after the queue count update completes.
This patch remedies these issues by doing the following:
1) The allocation config getter function is set up to retrieve the
_expected_ number of XDP queues to allocate instead of relying
on the value stored in `priv` which is only updated once the queues
have been allocated.
2) When adjusting queues, XDP queues are adjusted to match the number of
RX queues when XDP is enabled. This only works in the case when
queues are live, so part (1) of the fix must still be available in
the case that queues are adjusted when there is an XDP program and
the interface is down.
Fixes: 5f08cd3d6423 ("gve: Alloc before freeing when adjusting queues")
Cc: stable@vger.kernel.org
Signed-off-by: Joshua Washington <joshwash@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Reviewed-by: Shailend Chand <shailend@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-12-18 05:34:15 -08:00
2024-08-01 13:56:19 -07:00
if ( netif_running ( priv - > dev ) ) {
2024-05-01 23:25:48 +00:00
err = gve_adjust_config ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-01-22 18:26:31 +00:00
return err ;
2019-07-01 15:57:55 -07:00
}
/* Set the config for the next up. */
2025-02-19 12:04:51 -08:00
if ( reset_rss ) {
err = gve_init_rss_config ( priv , new_rx_config . num_queues ) ;
if ( err )
return err ;
}
2019-07-01 15:57:55 -07:00
priv - > tx_cfg = new_tx_config ;
priv - > rx_cfg = new_rx_config ;
return 0 ;
}
2019-07-01 15:57:53 -07:00
static void gve_turndown ( struct gve_priv * priv )
{
int idx ;
if ( netif_carrier_ok ( priv - > dev ) )
netif_carrier_off ( priv - > dev ) ;
if ( ! gve_get_napi_enabled ( priv ) )
return ;
/* Disable napi to prevent more work from coming in */
2023-03-15 16:33:08 -07:00
for ( idx = 0 ; idx < gve_num_tx_queues ( priv ) ; idx + + ) {
2019-07-01 15:57:53 -07:00
int ntfy_idx = gve_tx_idx_to_ntfy ( priv , idx ) ;
struct gve_notify_block * block = & priv - > ntfy_blocks [ ntfy_idx ] ;
2024-05-01 23:25:43 +00:00
if ( ! gve_tx_was_added_to_block ( priv , idx ) )
continue ;
gve: Map NAPI instances to queues
Use the netdev-genl interface to map NAPI instances to queues so that
this information is accessible to user programs via netlink.
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8313, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8314, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8315, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8316, 'type': 'rx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8317, 'type': 'rx'},
[...]
{'id': 0, 'ifindex': 2, 'napi-id': 8297, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8298, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8299, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8300, 'type': 'tx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8301, 'type': 'tx'},
[...]
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Link: https://patch.msgid.link/20240930210731.1629-3-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-09-30 21:07:08 +00:00
if ( idx < priv - > tx_cfg . num_queues )
netif_queue_set_napi ( priv - > dev , idx ,
NETDEV_QUEUE_TYPE_TX , NULL ) ;
2025-03-05 08:37:23 -08:00
napi_disable_locked ( & block - > napi ) ;
2019-07-01 15:57:53 -07:00
}
for ( idx = 0 ; idx < priv - > rx_cfg . num_queues ; idx + + ) {
int ntfy_idx = gve_rx_idx_to_ntfy ( priv , idx ) ;
struct gve_notify_block * block = & priv - > ntfy_blocks [ ntfy_idx ] ;
2024-05-01 23:25:43 +00:00
if ( ! gve_rx_was_added_to_block ( priv , idx ) )
continue ;
gve: Map NAPI instances to queues
Use the netdev-genl interface to map NAPI instances to queues so that
this information is accessible to user programs via netlink.
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8313, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8314, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8315, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8316, 'type': 'rx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8317, 'type': 'rx'},
[...]
{'id': 0, 'ifindex': 2, 'napi-id': 8297, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8298, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8299, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8300, 'type': 'tx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8301, 'type': 'tx'},
[...]
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Link: https://patch.msgid.link/20240930210731.1629-3-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-09-30 21:07:08 +00:00
netif_queue_set_napi ( priv - > dev , idx , NETDEV_QUEUE_TYPE_RX ,
NULL ) ;
2025-03-05 08:37:23 -08:00
napi_disable_locked ( & block - > napi ) ;
2019-07-01 15:57:53 -07:00
}
/* Stop tx queues */
netif_tx_disable ( priv - > dev ) ;
xdp: create locked/unlocked instances of xdp redirect target setters
Commit 03df156dd3a6 ("xdp: double protect netdev->xdp_flags with
netdev->lock") introduces the netdev lock to xdp_set_features_flag().
The change includes a _locked version of the method, as it is possible
for a driver to have already acquired the netdev lock before calling
this helper. However, the same applies to
xdp_features_(set|clear)_redirect_flags(), which ends up calling the
unlocked version of xdp_set_features_flags() leading to deadlocks in
GVE, which grabs the netdev lock as part of its suspend, reset, and
shutdown processes:
[ 833.265543] WARNING: possible recursive locking detected
[ 833.270949] 6.15.0-rc1 #6 Tainted: G E
[ 833.276271] --------------------------------------------
[ 833.281681] systemd-shutdow/1 is trying to acquire lock:
[ 833.287090] ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: xdp_set_features_flag+0x29/0x90
[ 833.295470]
[ 833.295470] but task is already holding lock:
[ 833.301400] ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: gve_shutdown+0x44/0x90 [gve]
[ 833.309508]
[ 833.309508] other info that might help us debug this:
[ 833.316130] Possible unsafe locking scenario:
[ 833.316130]
[ 833.322142] CPU0
[ 833.324681] ----
[ 833.327220] lock(&dev->lock);
[ 833.330455] lock(&dev->lock);
[ 833.333689]
[ 833.333689] *** DEADLOCK ***
[ 833.333689]
[ 833.339701] May be due to missing lock nesting notation
[ 833.339701]
[ 833.346582] 5 locks held by systemd-shutdow/1:
[ 833.351205] #0: ffffffffa9c89130 (system_transition_mutex){+.+.}-{4:4}, at: __se_sys_reboot+0xe6/0x210
[ 833.360695] #1: ffff93b399e5c1b8 (&dev->mutex){....}-{4:4}, at: device_shutdown+0xb4/0x1f0
[ 833.369144] #2: ffff949d19a471b8 (&dev->mutex){....}-{4:4}, at: device_shutdown+0xc2/0x1f0
[ 833.377603] #3: ffffffffa9eca050 (rtnl_mutex){+.+.}-{4:4}, at: gve_shutdown+0x33/0x90 [gve]
[ 833.386138] #4: ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: gve_shutdown+0x44/0x90 [gve]
Introduce xdp_features_(set|clear)_redirect_target_locked() versions
which assume that the netdev lock has already been acquired before
setting the XDP feature flag and update GVE to use the locked version.
Fixes: 03df156dd3a6 ("xdp: double protect netdev->xdp_flags with netdev->lock")
Tested-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250422011643.3509287-1-joshwash@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-04-21 18:16:32 -07:00
xdp_features_clear_redirect_target_locked ( priv - > dev ) ;
2025-02-14 14:43:59 -08:00
2019-07-01 15:57:53 -07:00
gve_clear_napi_enabled ( priv ) ;
2020-09-11 10:38:47 -07:00
gve_clear_report_stats ( priv ) ;
2024-12-18 05:34:12 -08:00
/* Make sure that all traffic is finished processing. */
synchronize_net ( ) ;
2019-07-01 15:57:53 -07:00
}
static void gve_turnup ( struct gve_priv * priv )
{
int idx ;
/* Start the tx queues */
netif_tx_start_all_queues ( priv - > dev ) ;
/* Enable napi and unmask interrupts for all queues */
2023-03-15 16:33:08 -07:00
for ( idx = 0 ; idx < gve_num_tx_queues ( priv ) ; idx + + ) {
2019-07-01 15:57:53 -07:00
int ntfy_idx = gve_tx_idx_to_ntfy ( priv , idx ) ;
struct gve_notify_block * block = & priv - > ntfy_blocks [ ntfy_idx ] ;
2024-05-01 23:25:43 +00:00
if ( ! gve_tx_was_added_to_block ( priv , idx ) )
continue ;
2025-03-05 08:37:23 -08:00
napi_enable_locked ( & block - > napi ) ;
gve: Map NAPI instances to queues
Use the netdev-genl interface to map NAPI instances to queues so that
this information is accessible to user programs via netlink.
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8313, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8314, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8315, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8316, 'type': 'rx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8317, 'type': 'rx'},
[...]
{'id': 0, 'ifindex': 2, 'napi-id': 8297, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8298, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8299, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8300, 'type': 'tx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8301, 'type': 'tx'},
[...]
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Link: https://patch.msgid.link/20240930210731.1629-3-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-09-30 21:07:08 +00:00
if ( idx < priv - > tx_cfg . num_queues )
netif_queue_set_napi ( priv - > dev , idx ,
NETDEV_QUEUE_TYPE_TX ,
& block - > napi ) ;
2021-06-24 11:06:30 -07:00
if ( gve_is_gqi ( priv ) ) {
iowrite32be ( 0 , gve_irq_doorbell ( priv , block ) ) ;
} else {
2021-12-15 16:46:52 -08:00
gve_set_itr_coalesce_usecs_dqo ( priv , block ,
priv - > tx_coalesce_usecs ) ;
2021-06-24 11:06:30 -07:00
}
2024-05-01 23:25:44 +00:00
/* Any descs written by the NIC before this barrier will be
* handled by the one - off napi schedule below . Whereas any
* descs after the barrier will generate interrupts .
*/
mb ( ) ;
napi_schedule ( & block - > napi ) ;
2019-07-01 15:57:53 -07:00
}
for ( idx = 0 ; idx < priv - > rx_cfg . num_queues ; idx + + ) {
int ntfy_idx = gve_rx_idx_to_ntfy ( priv , idx ) ;
struct gve_notify_block * block = & priv - > ntfy_blocks [ ntfy_idx ] ;
2024-05-01 23:25:43 +00:00
if ( ! gve_rx_was_added_to_block ( priv , idx ) )
continue ;
2025-03-05 08:37:23 -08:00
napi_enable_locked ( & block - > napi ) ;
gve: Map NAPI instances to queues
Use the netdev-genl interface to map NAPI instances to queues so that
this information is accessible to user programs via netlink.
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8313, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8314, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8315, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8316, 'type': 'rx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8317, 'type': 'rx'},
[...]
{'id': 0, 'ifindex': 2, 'napi-id': 8297, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8298, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8299, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8300, 'type': 'tx'},
{'id': 4, 'ifindex': 2, 'napi-id': 8301, 'type': 'tx'},
[...]
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
Link: https://patch.msgid.link/20240930210731.1629-3-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-09-30 21:07:08 +00:00
netif_queue_set_napi ( priv - > dev , idx , NETDEV_QUEUE_TYPE_RX ,
& block - > napi ) ;
2021-06-24 11:06:30 -07:00
if ( gve_is_gqi ( priv ) ) {
iowrite32be ( 0 , gve_irq_doorbell ( priv , block ) ) ;
} else {
2021-12-15 16:46:52 -08:00
gve_set_itr_coalesce_usecs_dqo ( priv , block ,
priv - > rx_coalesce_usecs ) ;
2021-06-24 11:06:30 -07:00
}
2024-05-01 23:25:44 +00:00
/* Any descs written by the NIC before this barrier will be
* handled by the one - off napi schedule below . Whereas any
* descs after the barrier will generate interrupts .
*/
mb ( ) ;
napi_schedule ( & block - > napi ) ;
2019-07-01 15:57:53 -07:00
}
2025-03-21 00:29:06 +00:00
if ( priv - > tx_cfg . num_xdp_queues & & gve_supports_xdp_xmit ( priv ) )
xdp: create locked/unlocked instances of xdp redirect target setters
Commit 03df156dd3a6 ("xdp: double protect netdev->xdp_flags with
netdev->lock") introduces the netdev lock to xdp_set_features_flag().
The change includes a _locked version of the method, as it is possible
for a driver to have already acquired the netdev lock before calling
this helper. However, the same applies to
xdp_features_(set|clear)_redirect_flags(), which ends up calling the
unlocked version of xdp_set_features_flags() leading to deadlocks in
GVE, which grabs the netdev lock as part of its suspend, reset, and
shutdown processes:
[ 833.265543] WARNING: possible recursive locking detected
[ 833.270949] 6.15.0-rc1 #6 Tainted: G E
[ 833.276271] --------------------------------------------
[ 833.281681] systemd-shutdow/1 is trying to acquire lock:
[ 833.287090] ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: xdp_set_features_flag+0x29/0x90
[ 833.295470]
[ 833.295470] but task is already holding lock:
[ 833.301400] ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: gve_shutdown+0x44/0x90 [gve]
[ 833.309508]
[ 833.309508] other info that might help us debug this:
[ 833.316130] Possible unsafe locking scenario:
[ 833.316130]
[ 833.322142] CPU0
[ 833.324681] ----
[ 833.327220] lock(&dev->lock);
[ 833.330455] lock(&dev->lock);
[ 833.333689]
[ 833.333689] *** DEADLOCK ***
[ 833.333689]
[ 833.339701] May be due to missing lock nesting notation
[ 833.339701]
[ 833.346582] 5 locks held by systemd-shutdow/1:
[ 833.351205] #0: ffffffffa9c89130 (system_transition_mutex){+.+.}-{4:4}, at: __se_sys_reboot+0xe6/0x210
[ 833.360695] #1: ffff93b399e5c1b8 (&dev->mutex){....}-{4:4}, at: device_shutdown+0xb4/0x1f0
[ 833.369144] #2: ffff949d19a471b8 (&dev->mutex){....}-{4:4}, at: device_shutdown+0xc2/0x1f0
[ 833.377603] #3: ffffffffa9eca050 (rtnl_mutex){+.+.}-{4:4}, at: gve_shutdown+0x33/0x90 [gve]
[ 833.386138] #4: ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: gve_shutdown+0x44/0x90 [gve]
Introduce xdp_features_(set|clear)_redirect_target_locked() versions
which assume that the netdev lock has already been acquired before
setting the XDP feature flag and update GVE to use the locked version.
Fixes: 03df156dd3a6 ("xdp: double protect netdev->xdp_flags with netdev->lock")
Tested-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com>
Signed-off-by: Joshua Washington <joshwash@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250422011643.3509287-1-joshwash@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-04-21 18:16:32 -07:00
xdp_features_set_redirect_target_locked ( priv - > dev , false ) ;
2025-02-14 14:43:59 -08:00
2019-07-01 15:57:53 -07:00
gve_set_napi_enabled ( priv ) ;
}
2024-05-01 23:25:49 +00:00
static void gve_turnup_and_check_status ( struct gve_priv * priv )
{
u32 status ;
gve_turnup ( priv ) ;
status = ioread32be ( & priv - > reg_bar0 - > device_status ) ;
gve_handle_link_status ( priv , GVE_DEVICE_STATUS_LINK_STATUS_MASK & status ) ;
}
2025-07-17 19:20:24 +00:00
static struct gve_notify_block * gve_get_tx_notify_block ( struct gve_priv * priv ,
unsigned int txqueue )
2019-07-01 15:57:53 -07:00
{
2021-10-11 08:36:47 -07:00
u32 ntfy_idx ;
if ( txqueue > priv - > tx_cfg . num_queues )
2025-07-17 19:20:24 +00:00
return NULL ;
2021-10-11 08:36:47 -07:00
ntfy_idx = gve_tx_idx_to_ntfy ( priv , txqueue ) ;
2021-11-09 14:47:36 +03:00
if ( ntfy_idx > = priv - > num_ntfy_blks )
2025-07-17 19:20:24 +00:00
return NULL ;
return & priv - > ntfy_blocks [ ntfy_idx ] ;
}
static bool gve_tx_timeout_try_q_kick ( struct gve_priv * priv ,
unsigned int txqueue )
{
struct gve_notify_block * block ;
u32 current_time ;
2021-10-11 08:36:47 -07:00
2025-07-17 19:20:24 +00:00
block = gve_get_tx_notify_block ( priv , txqueue ) ;
if ( ! block )
return false ;
2019-07-01 15:57:53 -07:00
2021-10-11 08:36:47 -07:00
current_time = jiffies_to_msecs ( jiffies ) ;
2025-07-17 19:20:24 +00:00
if ( block - > tx - > last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time )
return false ;
2021-10-11 08:36:47 -07:00
2025-07-17 19:20:24 +00:00
netdev_info ( priv - > dev , " Kicking queue %d " , txqueue ) ;
napi_schedule ( & block - > napi ) ;
block - > tx - > last_kick_msec = current_time ;
return true ;
}
2021-10-11 08:36:47 -07:00
2025-07-17 19:20:24 +00:00
static void gve_tx_timeout ( struct net_device * dev , unsigned int txqueue )
{
struct gve_notify_block * block ;
struct gve_priv * priv ;
2021-10-11 08:36:47 -07:00
2025-07-17 19:20:24 +00:00
netdev_info ( dev , " Timeout on tx queue, %d " , txqueue ) ;
priv = netdev_priv ( dev ) ;
if ( ! gve_tx_timeout_try_q_kick ( priv , txqueue ) )
gve_schedule_reset ( priv ) ;
block = gve_get_tx_notify_block ( priv , txqueue ) ;
if ( block )
block - > tx - > queue_timeout + + ;
2019-07-01 15:57:53 -07:00
priv - > tx_timeo_cnt + + ;
}
2024-02-29 13:22:35 -08:00
u16 gve_get_pkt_buf_size ( const struct gve_priv * priv , bool enable_hsplit )
{
if ( enable_hsplit & & priv - > max_rx_buffer_size > = GVE_MAX_RX_BUFFER_SIZE )
return GVE_MAX_RX_BUFFER_SIZE ;
else
return GVE_DEFAULT_RX_BUFFER_SIZE ;
}
2025-06-18 20:56:13 +00:00
/* Header split is only supported on DQ RDA queue format. If XDP is enabled,
* header split is not allowed .
*/
2024-02-29 13:22:35 -08:00
bool gve_header_split_supported ( const struct gve_priv * priv )
{
2025-06-18 20:56:13 +00:00
return priv - > header_buf_size & &
priv - > queue_format = = GVE_DQO_RDA_FORMAT & & ! priv - > xdp_prog ;
2024-02-29 13:22:35 -08:00
}
int gve_set_hsplit_config ( struct gve_priv * priv , u8 tcp_data_split )
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = { 0 } ;
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = { 0 } ;
bool enable_hdr_split ;
int err = 0 ;
if ( tcp_data_split = = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN )
return 0 ;
if ( ! gve_header_split_supported ( priv ) ) {
dev_err ( & priv - > pdev - > dev , " Header-split not supported \n " ) ;
return - EOPNOTSUPP ;
}
if ( tcp_data_split = = ETHTOOL_TCP_DATA_SPLIT_ENABLED )
enable_hdr_split = true ;
else
enable_hdr_split = false ;
if ( enable_hdr_split = = priv - > header_split_enabled )
return 0 ;
2024-05-01 23:25:48 +00:00
gve_get_curr_alloc_cfgs ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-02-29 13:22:35 -08:00
rx_alloc_cfg . enable_header_split = enable_hdr_split ;
rx_alloc_cfg . packet_buffer_size = gve_get_pkt_buf_size ( priv , enable_hdr_split ) ;
if ( netif_running ( priv - > dev ) )
2024-05-01 23:25:48 +00:00
err = gve_adjust_config ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-02-29 13:22:35 -08:00
return err ;
}
2021-06-24 11:06:28 -07:00
static int gve_set_features ( struct net_device * netdev ,
netdev_features_t features )
{
const netdev_features_t orig_features = netdev - > features ;
2024-01-22 18:26:32 +00:00
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = { 0 } ;
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = { 0 } ;
2021-06-24 11:06:28 -07:00
struct gve_priv * priv = netdev_priv ( netdev ) ;
int err ;
2024-05-01 23:25:48 +00:00
gve_get_curr_alloc_cfgs ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-01-22 18:26:32 +00:00
2021-06-24 11:06:28 -07:00
if ( ( netdev - > features & NETIF_F_LRO ) ! = ( features & NETIF_F_LRO ) ) {
netdev - > features ^ = NETIF_F_LRO ;
2025-06-18 20:56:13 +00:00
if ( priv - > xdp_prog & & ( netdev - > features & NETIF_F_LRO ) ) {
netdev_warn ( netdev ,
" XDP is not supported when LRO is on. \n " ) ;
err = - EOPNOTSUPP ;
goto revert_features ;
}
2024-08-01 13:56:19 -07:00
if ( netif_running ( netdev ) ) {
2024-05-01 23:25:48 +00:00
err = gve_adjust_config ( priv , & tx_alloc_cfg , & rx_alloc_cfg ) ;
2024-06-25 00:12:31 +00:00
if ( err )
goto revert_features ;
2021-06-24 11:06:28 -07:00
}
}
2024-06-25 00:12:31 +00:00
if ( ( netdev - > features & NETIF_F_NTUPLE ) & & ! ( features & NETIF_F_NTUPLE ) ) {
err = gve_flow_rules_reset ( priv ) ;
if ( err )
goto revert_features ;
}
2021-06-24 11:06:28 -07:00
return 0 ;
2024-06-25 00:12:31 +00:00
revert_features :
netdev - > features = orig_features ;
return err ;
2021-06-24 11:06:28 -07:00
}
2025-06-14 00:07:53 +00:00
static int gve_get_ts_config ( struct net_device * dev ,
struct kernel_hwtstamp_config * kernel_config )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
* kernel_config = priv - > ts_config ;
return 0 ;
}
static int gve_set_ts_config ( struct net_device * dev ,
struct kernel_hwtstamp_config * kernel_config ,
struct netlink_ext_ack * extack )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
if ( kernel_config - > tx_type ! = HWTSTAMP_TX_OFF ) {
NL_SET_ERR_MSG_MOD ( extack , " TX timestamping is not supported " ) ;
return - ERANGE ;
}
if ( kernel_config - > rx_filter ! = HWTSTAMP_FILTER_NONE ) {
if ( ! priv - > nic_ts_report ) {
NL_SET_ERR_MSG_MOD ( extack ,
" RX timestamping is not supported " ) ;
kernel_config - > rx_filter = HWTSTAMP_FILTER_NONE ;
return - EOPNOTSUPP ;
}
kernel_config - > rx_filter = HWTSTAMP_FILTER_ALL ;
gve_clock_nic_ts_read ( priv ) ;
ptp_schedule_worker ( priv - > ptp - > clock , 0 ) ;
} else {
ptp_cancel_worker_sync ( priv - > ptp - > clock ) ;
}
priv - > ts_config . rx_filter = kernel_config - > rx_filter ;
return 0 ;
}
2019-07-01 15:57:53 -07:00
static const struct net_device_ops gve_netdev_ops = {
2021-06-24 11:06:28 -07:00
. ndo_start_xmit = gve_start_xmit ,
2023-11-16 08:57:07 +00:00
. ndo_features_check = gve_features_check ,
2019-07-01 15:57:53 -07:00
. ndo_open = gve_open ,
. ndo_stop = gve_close ,
. ndo_get_stats64 = gve_get_stats ,
. ndo_tx_timeout = gve_tx_timeout ,
2021-06-24 11:06:28 -07:00
. ndo_set_features = gve_set_features ,
2023-03-15 16:33:10 -07:00
. ndo_bpf = gve_xdp ,
2023-03-15 16:33:11 -07:00
. ndo_xdp_xmit = gve_xdp_xmit ,
2023-03-15 16:33:12 -07:00
. ndo_xsk_wakeup = gve_xsk_wakeup ,
2025-06-14 00:07:53 +00:00
. ndo_hwtstamp_get = gve_get_ts_config ,
. ndo_hwtstamp_set = gve_set_ts_config ,
2019-07-01 15:57:53 -07:00
} ;
2019-07-01 15:57:54 -07:00
static void gve_handle_status ( struct gve_priv * priv , u32 status )
{
if ( GVE_DEVICE_STATUS_RESET_MASK & status ) {
dev_info ( & priv - > pdev - > dev , " Device requested reset. \n " ) ;
gve_set_do_reset ( priv ) ;
}
2020-09-11 10:38:47 -07:00
if ( GVE_DEVICE_STATUS_REPORT_STATS_MASK & status ) {
priv - > stats_report_trigger_cnt + + ;
gve_set_do_report_stats ( priv ) ;
}
2019-07-01 15:57:54 -07:00
}
static void gve_handle_reset ( struct gve_priv * priv )
{
/* A service task will be scheduled at the end of probe to catch any
* resets that need to happen , and we don ' t want to reset until
* probe is done .
*/
if ( gve_get_probe_in_progress ( priv ) )
return ;
if ( gve_get_do_reset ( priv ) ) {
rtnl_lock ( ) ;
2025-03-28 09:47:42 -07:00
netdev_lock ( priv - > dev ) ;
2019-07-01 15:57:54 -07:00
gve_reset ( priv , false ) ;
2025-03-28 09:47:42 -07:00
netdev_unlock ( priv - > dev ) ;
2019-07-01 15:57:54 -07:00
rtnl_unlock ( ) ;
}
}
2020-09-11 10:38:47 -07:00
void gve_handle_report_stats ( struct gve_priv * priv )
{
struct stats * stats = priv - > stats_report - > stats ;
2021-10-05 18:01:38 -07:00
int idx , stats_idx = 0 ;
unsigned int start = 0 ;
u64 tx_bytes ;
2020-09-11 10:38:47 -07:00
if ( ! gve_get_report_stats ( priv ) )
return ;
be64_add_cpu ( & priv - > stats_report - > written_count , 1 ) ;
/* tx stats */
if ( priv - > tx ) {
2023-03-15 16:33:08 -07:00
for ( idx = 0 ; idx < gve_num_tx_queues ( priv ) ; idx + + ) {
2021-06-24 11:06:28 -07:00
u32 last_completion = 0 ;
u32 tx_frames = 0 ;
/* DQO doesn't currently support these metrics. */
if ( gve_is_gqi ( priv ) ) {
last_completion = priv - > tx [ idx ] . done ;
tx_frames = priv - > tx [ idx ] . req ;
}
2020-09-11 10:38:47 -07:00
do {
2022-10-26 15:22:14 +02:00
start = u64_stats_fetch_begin ( & priv - > tx [ idx ] . statss ) ;
2020-09-11 10:38:47 -07:00
tx_bytes = priv - > tx [ idx ] . bytes_done ;
2022-10-26 15:22:14 +02:00
} while ( u64_stats_fetch_retry ( & priv - > tx [ idx ] . statss , start ) ) ;
2020-09-11 10:38:47 -07:00
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( TX_WAKE_CNT ) ,
. value = cpu_to_be64 ( priv - > tx [ idx ] . wake_queue ) ,
. queue_id = cpu_to_be32 ( idx ) ,
} ;
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( TX_STOP_CNT ) ,
. value = cpu_to_be64 ( priv - > tx [ idx ] . stop_queue ) ,
. queue_id = cpu_to_be32 ( idx ) ,
} ;
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( TX_FRAMES_SENT ) ,
2021-06-24 11:06:28 -07:00
. value = cpu_to_be64 ( tx_frames ) ,
2020-09-11 10:38:47 -07:00
. queue_id = cpu_to_be32 ( idx ) ,
} ;
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( TX_BYTES_SENT ) ,
. value = cpu_to_be64 ( tx_bytes ) ,
. queue_id = cpu_to_be32 ( idx ) ,
} ;
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( TX_LAST_COMPLETION_PROCESSED ) ,
2021-06-24 11:06:28 -07:00
. value = cpu_to_be64 ( last_completion ) ,
2020-09-11 10:38:47 -07:00
. queue_id = cpu_to_be32 ( idx ) ,
} ;
2021-10-11 08:36:47 -07:00
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( TX_TIMEOUT_CNT ) ,
. value = cpu_to_be64 ( priv - > tx [ idx ] . queue_timeout ) ,
. queue_id = cpu_to_be32 ( idx ) ,
} ;
2020-09-11 10:38:47 -07:00
}
}
/* rx stats */
if ( priv - > rx ) {
for ( idx = 0 ; idx < priv - > rx_cfg . num_queues ; idx + + ) {
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( RX_NEXT_EXPECTED_SEQUENCE ) ,
. value = cpu_to_be64 ( priv - > rx [ idx ] . desc . seqno ) ,
. queue_id = cpu_to_be32 ( idx ) ,
} ;
stats [ stats_idx + + ] = ( struct stats ) {
. stat_name = cpu_to_be32 ( RX_BUFFERS_POSTED ) ,
2025-05-27 06:08:16 -07:00
. value = cpu_to_be64 ( priv - > rx [ idx ] . fill_cnt ) ,
2020-09-11 10:38:47 -07:00
. queue_id = cpu_to_be32 ( idx ) ,
} ;
}
}
}
/* Handle NIC status register changes, reset requests and report stats */
2019-07-01 15:57:54 -07:00
static void gve_service_task ( struct work_struct * work )
{
struct gve_priv * priv = container_of ( work , struct gve_priv ,
service_task ) ;
2020-09-11 10:38:50 -07:00
u32 status = ioread32be ( & priv - > reg_bar0 - > device_status ) ;
2019-07-01 15:57:54 -07:00
2020-09-11 10:38:50 -07:00
gve_handle_status ( priv , status ) ;
2019-07-01 15:57:54 -07:00
gve_handle_reset ( priv ) ;
2020-09-11 10:38:50 -07:00
gve_handle_link_status ( priv , GVE_DEVICE_STATUS_LINK_STATUS_MASK & status ) ;
2019-07-01 15:57:54 -07:00
}
2023-03-15 16:33:10 -07:00
static void gve_set_netdev_xdp_features ( struct gve_priv * priv )
{
2025-01-06 10:02:10 -08:00
xdp_features_t xdp_features ;
2023-03-15 16:33:10 -07:00
if ( priv - > queue_format = = GVE_GQI_QPL_FORMAT ) {
2025-01-06 10:02:10 -08:00
xdp_features = NETDEV_XDP_ACT_BASIC ;
xdp_features | = NETDEV_XDP_ACT_REDIRECT ;
xdp_features | = NETDEV_XDP_ACT_XSK_ZEROCOPY ;
2025-06-18 20:56:13 +00:00
} else if ( priv - > queue_format = = GVE_DQO_RDA_FORMAT ) {
xdp_features = NETDEV_XDP_ACT_BASIC ;
xdp_features | = NETDEV_XDP_ACT_REDIRECT ;
2025-07-17 08:28:39 -07:00
xdp_features | = NETDEV_XDP_ACT_XSK_ZEROCOPY ;
2023-03-15 16:33:10 -07:00
} else {
2025-01-06 10:02:10 -08:00
xdp_features = 0 ;
2023-03-15 16:33:10 -07:00
}
2025-01-06 10:02:10 -08:00
2025-04-08 12:59:52 -07:00
xdp_set_features_flag_locked ( priv - > dev , xdp_features ) ;
2023-03-15 16:33:10 -07:00
}
2019-07-01 15:57:52 -07:00
static int gve_init_priv ( struct gve_priv * priv , bool skip_describe_device )
{
int num_ntfy ;
int err ;
/* Set up the adminq */
err = gve_adminq_alloc ( & priv - > pdev - > dev , priv ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Failed to alloc admin queue: err=%d \n " , err ) ;
return err ;
}
2022-11-17 08:27:00 -08:00
err = gve_verify_driver_compatibility ( priv ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Could not verify driver compatibility: err=%d \n " , err ) ;
goto err ;
}
2024-01-22 18:26:29 +00:00
priv - > num_registered_pages = 0 ;
2019-07-01 15:57:52 -07:00
if ( skip_describe_device )
goto setup_device ;
2021-06-24 11:06:22 -07:00
priv - > queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED ;
2019-07-01 15:57:52 -07:00
/* Get the initial information we need from the device */
err = gve_adminq_describe_device ( priv ) ;
if ( err ) {
dev_err ( & priv - > pdev - > dev ,
" Could not get device information: err=%d \n " , err ) ;
goto err ;
}
priv - > dev - > mtu = priv - > dev - > max_mtu ;
num_ntfy = pci_msix_vec_count ( priv - > pdev ) ;
if ( num_ntfy < = 0 ) {
dev_err ( & priv - > pdev - > dev ,
" could not count MSI-x vectors: err=%d \n " , num_ntfy ) ;
err = num_ntfy ;
goto err ;
} else if ( num_ntfy < GVE_MIN_MSIX ) {
dev_err ( & priv - > pdev - > dev , " gve needs at least %d MSI-x vectors, but only has %d \n " ,
GVE_MIN_MSIX , num_ntfy ) ;
err = - EINVAL ;
goto err ;
}
2025-06-15 22:45:00 -07:00
/* Big TCP is only supported on DQO */
gve: Support IPv6 Big TCP on DQ
Add support for using IPv6 Big TCP on DQ which can handle large TSO/GRO
packets. See https://lwn.net/Articles/895398/. This can improve the
throughput and CPU usage.
Perf test result:
ip -d link show $DEV
gso_max_size 185000 gso_max_segs 65535 tso_max_size 262143 tso_max_segs 65535 gro_max_size 185000
For performance, tested with neper using 9k MTU on hardware that supports 200Gb/s line rate.
In single streams when line rate is not saturated, we expect throughput improvements.
When the networking is performing at line rate, we expect cpu usage improvements.
Tcp_stream (unidirectional stream test, T=thread, F=flow):
skb=180kb, T=1, F=1, no zerocopy: throughput average=64576.88 Mb/s, sender stime=8.3, receiver stime=10.68
skb=64kb, T=1, F=1, no zerocopy: throughput average=64862.54 Mb/s, sender stime=9.96, receiver stime=12.67
skb=180kb, T=1, F=1, yes zerocopy: throughput average=146604.97 Mb/s, sender stime=10.61, receiver stime=5.52
skb=64kb, T=1, F=1, yes zerocopy: throughput average=131357.78 Mb/s, sender stime=12.11, receiver stime=12.25
skb=180kb, T=20, F=100, no zerocopy: throughput average=182411.37 Mb/s, sender stime=41.62, receiver stime=79.4
skb=64kb, T=20, F=100, no zerocopy: throughput average=182892.02 Mb/s, sender stime=57.39, receiver stime=72.69
skb=180kb, T=20, F=100, yes zerocopy: throughput average=182337.65 Mb/s, sender stime=27.94, receiver stime=39.7
skb=64kb, T=20, F=100, yes zerocopy: throughput average=182144.20 Mb/s, sender stime=47.06, receiver stime=39.01
Signed-off-by: Ziwei Xiao <ziweixiao@google.com>
Signed-off-by: Coco Li <lixiaoyan@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230522201552.3585421-1-ziweixiao@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-05-22 13:15:52 -07:00
if ( ! gve_is_gqi ( priv ) )
2023-08-04 21:34:41 +00:00
netif_set_tso_max_size ( priv - > dev , GVE_DQO_TX_MAX ) ;
gve: Support IPv6 Big TCP on DQ
Add support for using IPv6 Big TCP on DQ which can handle large TSO/GRO
packets. See https://lwn.net/Articles/895398/. This can improve the
throughput and CPU usage.
Perf test result:
ip -d link show $DEV
gso_max_size 185000 gso_max_segs 65535 tso_max_size 262143 tso_max_segs 65535 gro_max_size 185000
For performance, tested with neper using 9k MTU on hardware that supports 200Gb/s line rate.
In single streams when line rate is not saturated, we expect throughput improvements.
When the networking is performing at line rate, we expect cpu usage improvements.
Tcp_stream (unidirectional stream test, T=thread, F=flow):
skb=180kb, T=1, F=1, no zerocopy: throughput average=64576.88 Mb/s, sender stime=8.3, receiver stime=10.68
skb=64kb, T=1, F=1, no zerocopy: throughput average=64862.54 Mb/s, sender stime=9.96, receiver stime=12.67
skb=180kb, T=1, F=1, yes zerocopy: throughput average=146604.97 Mb/s, sender stime=10.61, receiver stime=5.52
skb=64kb, T=1, F=1, yes zerocopy: throughput average=131357.78 Mb/s, sender stime=12.11, receiver stime=12.25
skb=180kb, T=20, F=100, no zerocopy: throughput average=182411.37 Mb/s, sender stime=41.62, receiver stime=79.4
skb=64kb, T=20, F=100, no zerocopy: throughput average=182892.02 Mb/s, sender stime=57.39, receiver stime=72.69
skb=180kb, T=20, F=100, yes zerocopy: throughput average=182337.65 Mb/s, sender stime=27.94, receiver stime=39.7
skb=64kb, T=20, F=100, yes zerocopy: throughput average=182144.20 Mb/s, sender stime=47.06, receiver stime=39.01
Signed-off-by: Ziwei Xiao <ziweixiao@google.com>
Signed-off-by: Coco Li <lixiaoyan@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230522201552.3585421-1-ziweixiao@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-05-22 13:15:52 -07:00
2019-07-01 15:57:53 -07:00
priv - > rx_copybreak = GVE_DEFAULT_RX_COPYBREAK ;
2019-07-01 15:57:52 -07:00
/* gvnic has one Notification Block per MSI-x vector, except for the
* management vector
*/
priv - > num_ntfy_blks = ( num_ntfy - 1 ) & ~ 0x1 ;
priv - > mgmt_msix_idx = priv - > num_ntfy_blks ;
2025-07-07 14:01:07 -07:00
priv - > numa_node = dev_to_node ( & priv - > pdev - > dev ) ;
2019-07-01 15:57:52 -07:00
2019-07-01 15:57:53 -07:00
priv - > tx_cfg . max_queues =
min_t ( int , priv - > tx_cfg . max_queues , priv - > num_ntfy_blks / 2 ) ;
priv - > rx_cfg . max_queues =
min_t ( int , priv - > rx_cfg . max_queues , priv - > num_ntfy_blks / 2 ) ;
priv - > tx_cfg . num_queues = priv - > tx_cfg . max_queues ;
priv - > rx_cfg . num_queues = priv - > rx_cfg . max_queues ;
if ( priv - > default_num_queues > 0 ) {
priv - > tx_cfg . num_queues = min_t ( int , priv - > default_num_queues ,
priv - > tx_cfg . num_queues ) ;
priv - > rx_cfg . num_queues = min_t ( int , priv - > default_num_queues ,
priv - > rx_cfg . num_queues ) ;
}
2025-03-21 00:29:06 +00:00
priv - > tx_cfg . num_xdp_queues = 0 ;
2019-07-01 15:57:53 -07:00
2020-09-11 10:38:46 -07:00
dev_info ( & priv - > pdev - > dev , " TX queues %d, RX queues %d \n " ,
priv - > tx_cfg . num_queues , priv - > rx_cfg . num_queues ) ;
dev_info ( & priv - > pdev - > dev , " Max TX queues %d, Max RX queues %d \n " ,
priv - > tx_cfg . max_queues , priv - > rx_cfg . max_queues ) ;
2019-07-01 15:57:53 -07:00
2021-12-15 16:46:52 -08:00
if ( ! gve_is_gqi ( priv ) ) {
priv - > tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO ;
priv - > rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO ;
}
2025-06-14 00:07:53 +00:00
priv - > ts_config . tx_type = HWTSTAMP_TX_OFF ;
priv - > ts_config . rx_filter = HWTSTAMP_FILTER_NONE ;
2019-07-01 15:57:52 -07:00
setup_device :
2025-07-17 08:28:37 -07:00
priv - > xsk_pools = bitmap_zalloc ( priv - > rx_cfg . max_queues , GFP_KERNEL ) ;
if ( ! priv - > xsk_pools ) {
err = - ENOMEM ;
goto err ;
}
2023-03-15 16:33:10 -07:00
gve_set_netdev_xdp_features ( priv ) ;
2019-07-01 15:57:52 -07:00
err = gve_setup_device_resources ( priv ) ;
2025-07-17 08:28:37 -07:00
if ( err )
goto err_free_xsk_bitmap ;
return 0 ;
err_free_xsk_bitmap :
bitmap_free ( priv - > xsk_pools ) ;
priv - > xsk_pools = NULL ;
2019-07-01 15:57:52 -07:00
err :
gve_adminq_free ( & priv - > pdev - > dev , priv ) ;
return err ;
}
static void gve_teardown_priv_resources ( struct gve_priv * priv )
{
gve_teardown_device_resources ( priv ) ;
gve_adminq_free ( & priv - > pdev - > dev , priv ) ;
2025-07-17 08:28:37 -07:00
bitmap_free ( priv - > xsk_pools ) ;
priv - > xsk_pools = NULL ;
2019-07-01 15:57:52 -07:00
}
2019-07-01 15:57:54 -07:00
static void gve_trigger_reset ( struct gve_priv * priv )
{
/* Reset the device by releasing the AQ */
gve_adminq_release ( priv ) ;
}
static void gve_reset_and_teardown ( struct gve_priv * priv , bool was_up )
{
gve_trigger_reset ( priv ) ;
/* With the reset having already happened, close cannot fail */
if ( was_up )
gve_close ( priv - > dev ) ;
gve_teardown_priv_resources ( priv ) ;
}
static int gve_reset_recovery ( struct gve_priv * priv , bool was_up )
{
int err ;
err = gve_init_priv ( priv , true ) ;
if ( err )
goto err ;
if ( was_up ) {
err = gve_open ( priv - > dev ) ;
if ( err )
goto err ;
}
return 0 ;
err :
dev_err ( & priv - > pdev - > dev , " Reset failed! !!! DISABLING ALL QUEUES !!! \n " ) ;
gve_turndown ( priv ) ;
return err ;
}
int gve_reset ( struct gve_priv * priv , bool attempt_teardown )
{
2024-08-01 13:56:19 -07:00
bool was_up = netif_running ( priv - > dev ) ;
2019-07-01 15:57:54 -07:00
int err ;
dev_info ( & priv - > pdev - > dev , " Performing reset \n " ) ;
gve_clear_do_reset ( priv ) ;
gve_set_reset_in_progress ( priv ) ;
/* If we aren't attempting to teardown normally, just go turndown and
* reset right away .
*/
if ( ! attempt_teardown ) {
gve_turndown ( priv ) ;
gve_reset_and_teardown ( priv , was_up ) ;
} else {
/* Otherwise attempt to close normally */
if ( was_up ) {
err = gve_close ( priv - > dev ) ;
/* If that fails reset as we did above */
if ( err )
gve_reset_and_teardown ( priv , was_up ) ;
}
/* Clean up any remaining resources */
gve_teardown_priv_resources ( priv ) ;
}
/* Set it all back up */
err = gve_reset_recovery ( priv , was_up ) ;
gve_clear_reset_in_progress ( priv ) ;
2020-09-11 10:38:45 -07:00
priv - > reset_cnt + + ;
priv - > interface_up_cnt = 0 ;
priv - > interface_down_cnt = 0 ;
2020-09-11 10:38:47 -07:00
priv - > stats_report_trigger_cnt = 0 ;
2019-07-01 15:57:54 -07:00
return err ;
}
2019-07-01 15:57:52 -07:00
static void gve_write_version ( u8 __iomem * driver_version_register )
{
const char * c = gve_version_prefix ;
while ( * c ) {
writeb ( * c , driver_version_register ) ;
c + + ;
}
c = gve_version_str ;
while ( * c ) {
writeb ( * c , driver_version_register ) ;
c + + ;
}
writeb ( ' \n ' , driver_version_register ) ;
}
2024-05-01 23:25:49 +00:00
static int gve_rx_queue_stop ( struct net_device * dev , void * per_q_mem , int idx )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct gve_rx_ring * gve_per_q_mem ;
int err ;
if ( ! priv - > rx )
return - EAGAIN ;
/* Destroying queue 0 while other queues exist is not supported in DQO */
if ( ! gve_is_gqi ( priv ) & & idx = = 0 )
return - ERANGE ;
/* Single-queue destruction requires quiescence on all queues */
gve_turndown ( priv ) ;
/* This failure will trigger a reset - no need to clean up */
err = gve_adminq_destroy_single_rx_queue ( priv , idx ) ;
if ( err )
return err ;
if ( gve_is_qpl ( priv ) ) {
/* This failure will trigger a reset - no need to clean up */
err = gve_unregister_qpl ( priv , gve_rx_get_qpl ( priv , idx ) ) ;
if ( err )
return err ;
}
gve_rx_stop_ring ( priv , idx ) ;
/* Turn the unstopped queues back up */
gve_turnup_and_check_status ( priv ) ;
gve_per_q_mem = ( struct gve_rx_ring * ) per_q_mem ;
* gve_per_q_mem = priv - > rx [ idx ] ;
memset ( & priv - > rx [ idx ] , 0 , sizeof ( priv - > rx [ idx ] ) ) ;
return 0 ;
}
static void gve_rx_queue_mem_free ( struct net_device * dev , void * per_q_mem )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct gve_rx_alloc_rings_cfg cfg = { 0 } ;
struct gve_rx_ring * gve_per_q_mem ;
gve_per_q_mem = ( struct gve_rx_ring * ) per_q_mem ;
gve_rx_get_curr_alloc_cfg ( priv , & cfg ) ;
if ( gve_is_gqi ( priv ) )
gve_rx_free_ring_gqi ( priv , gve_per_q_mem , & cfg ) ;
else
gve_rx_free_ring_dqo ( priv , gve_per_q_mem , & cfg ) ;
}
static int gve_rx_queue_mem_alloc ( struct net_device * dev , void * per_q_mem ,
int idx )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct gve_rx_alloc_rings_cfg cfg = { 0 } ;
struct gve_rx_ring * gve_per_q_mem ;
int err ;
if ( ! priv - > rx )
return - EAGAIN ;
gve_per_q_mem = ( struct gve_rx_ring * ) per_q_mem ;
gve_rx_get_curr_alloc_cfg ( priv , & cfg ) ;
if ( gve_is_gqi ( priv ) )
err = gve_rx_alloc_ring_gqi ( priv , & cfg , gve_per_q_mem , idx ) ;
else
err = gve_rx_alloc_ring_dqo ( priv , & cfg , gve_per_q_mem , idx ) ;
return err ;
}
static int gve_rx_queue_start ( struct net_device * dev , void * per_q_mem , int idx )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct gve_rx_ring * gve_per_q_mem ;
int err ;
if ( ! priv - > rx )
return - EAGAIN ;
gve_per_q_mem = ( struct gve_rx_ring * ) per_q_mem ;
priv - > rx [ idx ] = * gve_per_q_mem ;
/* Single-queue creation requires quiescence on all queues */
gve_turndown ( priv ) ;
gve_rx_start_ring ( priv , idx ) ;
if ( gve_is_qpl ( priv ) ) {
/* This failure will trigger a reset - no need to clean up */
err = gve_register_qpl ( priv , gve_rx_get_qpl ( priv , idx ) ) ;
if ( err )
goto abort ;
}
/* This failure will trigger a reset - no need to clean up */
err = gve_adminq_create_single_rx_queue ( priv , idx ) ;
if ( err )
goto abort ;
if ( gve_is_gqi ( priv ) )
gve_rx_write_doorbell ( priv , & priv - > rx [ idx ] ) ;
else
gve_rx_post_buffers_dqo ( & priv - > rx [ idx ] ) ;
/* Turn the unstopped queues back up */
gve_turnup_and_check_status ( priv ) ;
return 0 ;
abort :
gve_rx_stop_ring ( priv , idx ) ;
/* All failures in this func result in a reset, by clearing the struct
* at idx , we prevent a double free when that reset runs . The reset ,
* which needs the rtnl lock , will not run till this func returns and
* its caller gives up the lock .
*/
memset ( & priv - > rx [ idx ] , 0 , sizeof ( priv - > rx [ idx ] ) ) ;
return err ;
}
static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
. ndo_queue_mem_size = sizeof ( struct gve_rx_ring ) ,
. ndo_queue_mem_alloc = gve_rx_queue_mem_alloc ,
. ndo_queue_mem_free = gve_rx_queue_mem_free ,
. ndo_queue_start = gve_rx_queue_start ,
. ndo_queue_stop = gve_rx_queue_stop ,
} ;
2024-10-14 13:21:08 -07:00
static void gve_get_rx_queue_stats ( struct net_device * dev , int idx ,
struct netdev_queue_stats_rx * rx_stats )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct gve_rx_ring * rx = & priv - > rx [ idx ] ;
unsigned int start ;
do {
start = u64_stats_fetch_begin ( & rx - > statss ) ;
rx_stats - > packets = rx - > rpackets ;
rx_stats - > bytes = rx - > rbytes ;
rx_stats - > alloc_fail = rx - > rx_skb_alloc_fail +
rx - > rx_buf_alloc_fail ;
} while ( u64_stats_fetch_retry ( & rx - > statss , start ) ) ;
}
static void gve_get_tx_queue_stats ( struct net_device * dev , int idx ,
struct netdev_queue_stats_tx * tx_stats )
{
struct gve_priv * priv = netdev_priv ( dev ) ;
struct gve_tx_ring * tx = & priv - > tx [ idx ] ;
unsigned int start ;
do {
start = u64_stats_fetch_begin ( & tx - > statss ) ;
tx_stats - > packets = tx - > pkt_done ;
tx_stats - > bytes = tx - > bytes_done ;
} while ( u64_stats_fetch_retry ( & tx - > statss , start ) ) ;
}
static void gve_get_base_stats ( struct net_device * dev ,
struct netdev_queue_stats_rx * rx ,
struct netdev_queue_stats_tx * tx )
{
rx - > packets = 0 ;
rx - > bytes = 0 ;
rx - > alloc_fail = 0 ;
tx - > packets = 0 ;
tx - > bytes = 0 ;
}
static const struct netdev_stat_ops gve_stat_ops = {
. get_queue_stats_rx = gve_get_rx_queue_stats ,
. get_queue_stats_tx = gve_get_tx_queue_stats ,
. get_base_stats = gve_get_base_stats ,
} ;
2019-07-01 15:57:52 -07:00
static int gve_probe ( struct pci_dev * pdev , const struct pci_device_id * ent )
{
int max_tx_queues , max_rx_queues ;
struct net_device * dev ;
__be32 __iomem * db_bar ;
struct gve_registers __iomem * reg_bar ;
struct gve_priv * priv ;
int err ;
err = pci_enable_device ( pdev ) ;
if ( err )
2021-07-01 22:18:37 +02:00
return err ;
2019-07-01 15:57:52 -07:00
2023-07-08 11:14:51 +08:00
err = pci_request_regions ( pdev , gve_driver_name ) ;
2019-07-01 15:57:52 -07:00
if ( err )
goto abort_with_enabled ;
pci_set_master ( pdev ) ;
2021-07-01 22:41:19 +02:00
err = dma_set_mask_and_coherent ( & pdev - > dev , DMA_BIT_MASK ( 64 ) ) ;
2019-07-01 15:57:52 -07:00
if ( err ) {
dev_err ( & pdev - > dev , " Failed to set dma mask: err=%d \n " , err ) ;
goto abort_with_pci_region ;
}
reg_bar = pci_iomap ( pdev , GVE_REGISTER_BAR , 0 ) ;
if ( ! reg_bar ) {
2019-07-01 15:57:53 -07:00
dev_err ( & pdev - > dev , " Failed to map pci bar! \n " ) ;
2019-07-01 15:57:52 -07:00
err = - ENOMEM ;
goto abort_with_pci_region ;
}
db_bar = pci_iomap ( pdev , GVE_DOORBELL_BAR , 0 ) ;
if ( ! db_bar ) {
dev_err ( & pdev - > dev , " Failed to map doorbell bar! \n " ) ;
err = - ENOMEM ;
goto abort_with_reg_bar ;
}
gve_write_version ( & reg_bar - > driver_version ) ;
/* Get max queues to alloc etherdev */
2021-06-24 19:55:41 -07:00
max_tx_queues = ioread32be ( & reg_bar - > max_tx_queues ) ;
max_rx_queues = ioread32be ( & reg_bar - > max_rx_queues ) ;
2019-07-01 15:57:52 -07:00
/* Alloc and setup the netdev and priv */
dev = alloc_etherdev_mqs ( sizeof ( * priv ) , max_tx_queues , max_rx_queues ) ;
if ( ! dev ) {
dev_err ( & pdev - > dev , " could not allocate netdev \n " ) ;
2021-07-01 22:18:37 +02:00
err = - ENOMEM ;
2019-07-01 15:57:52 -07:00
goto abort_with_db_bar ;
}
SET_NETDEV_DEV ( dev , & pdev - > dev ) ;
pci_set_drvdata ( pdev , dev ) ;
2019-07-01 15:57:55 -07:00
dev - > ethtool_ops = & gve_ethtool_ops ;
2019-07-01 15:57:53 -07:00
dev - > netdev_ops = & gve_netdev_ops ;
2024-05-01 23:25:49 +00:00
dev - > queue_mgmt_ops = & gve_queue_mgmt_ops ;
2024-10-14 13:21:08 -07:00
dev - > stat_ops = & gve_stat_ops ;
2021-06-24 11:06:28 -07:00
/* Set default and supported features.
*
* Features might be set in other locations as well ( such as
* ` gve_adminq_describe_device ` ) .
*/
2019-07-01 15:57:52 -07:00
dev - > hw_features = NETIF_F_HIGHDMA ;
dev - > hw_features | = NETIF_F_SG ;
dev - > hw_features | = NETIF_F_HW_CSUM ;
dev - > hw_features | = NETIF_F_TSO ;
dev - > hw_features | = NETIF_F_TSO6 ;
dev - > hw_features | = NETIF_F_TSO_ECN ;
dev - > hw_features | = NETIF_F_RXCSUM ;
dev - > hw_features | = NETIF_F_RXHASH ;
dev - > features = dev - > hw_features ;
2019-07-01 15:57:53 -07:00
dev - > watchdog_timeo = 5 * HZ ;
2019-07-01 15:57:52 -07:00
dev - > min_mtu = ETH_MIN_MTU ;
netif_carrier_off ( dev ) ;
priv = netdev_priv ( dev ) ;
priv - > dev = dev ;
priv - > pdev = pdev ;
priv - > msg_enable = DEFAULT_MSG_LEVEL ;
priv - > reg_bar0 = reg_bar ;
priv - > db_bar2 = db_bar ;
2019-07-01 15:57:54 -07:00
priv - > service_task_flags = 0x0 ;
2019-07-01 15:57:52 -07:00
priv - > state_flags = 0x0 ;
2020-09-11 10:38:47 -07:00
priv - > ethtool_flags = 0x0 ;
2025-03-21 00:29:08 +00:00
priv - > rx_cfg . packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE ;
2024-02-29 13:22:34 -08:00
priv - > max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE ;
2019-07-01 15:57:54 -07:00
gve_set_probe_in_progress ( priv ) ;
priv - > gve_wq = alloc_ordered_workqueue ( " gve " , 0 ) ;
if ( ! priv - > gve_wq ) {
dev_err ( & pdev - > dev , " Could not allocate workqueue " ) ;
err = - ENOMEM ;
goto abort_with_netdev ;
}
INIT_WORK ( & priv - > service_task , gve_service_task ) ;
2020-09-11 10:38:47 -07:00
INIT_WORK ( & priv - > stats_report_task , gve_stats_report_task ) ;
2019-07-01 15:57:53 -07:00
priv - > tx_cfg . max_queues = max_tx_queues ;
priv - > rx_cfg . max_queues = max_rx_queues ;
2019-07-01 15:57:52 -07:00
err = gve_init_priv ( priv , false ) ;
if ( err )
2019-07-01 15:57:54 -07:00
goto abort_with_wq ;
2019-07-01 15:57:52 -07:00
2025-05-08 00:48:27 +00:00
if ( ! gve_is_gqi ( priv ) & & ! gve_is_qpl ( priv ) )
dev - > netmem_tx = true ;
2019-07-01 15:57:52 -07:00
err = register_netdev ( dev ) ;
if ( err )
2021-07-01 22:18:24 +02:00
goto abort_with_gve_init ;
2019-07-01 15:57:52 -07:00
dev_info ( & pdev - > dev , " GVE version %s \n " , gve_version_str ) ;
2021-06-24 11:06:22 -07:00
dev_info ( & pdev - > dev , " GVE queue format %d \n " , ( int ) priv - > queue_format ) ;
2019-07-01 15:57:54 -07:00
gve_clear_probe_in_progress ( priv ) ;
queue_work ( priv - > gve_wq , & priv - > service_task ) ;
2019-07-01 15:57:52 -07:00
return 0 ;
2021-07-01 22:18:24 +02:00
abort_with_gve_init :
gve_teardown_priv_resources ( priv ) ;
2019-07-01 15:57:54 -07:00
abort_with_wq :
destroy_workqueue ( priv - > gve_wq ) ;
2019-07-01 15:57:52 -07:00
abort_with_netdev :
free_netdev ( dev ) ;
abort_with_db_bar :
pci_iounmap ( pdev , db_bar ) ;
abort_with_reg_bar :
pci_iounmap ( pdev , reg_bar ) ;
abort_with_pci_region :
pci_release_regions ( pdev ) ;
abort_with_enabled :
pci_disable_device ( pdev ) ;
2021-07-01 22:18:37 +02:00
return err ;
2019-07-01 15:57:52 -07:00
}
static void gve_remove ( struct pci_dev * pdev )
{
struct net_device * netdev = pci_get_drvdata ( pdev ) ;
struct gve_priv * priv = netdev_priv ( netdev ) ;
__be32 __iomem * db_bar = priv - > db_bar2 ;
void __iomem * reg_bar = priv - > reg_bar0 ;
unregister_netdev ( netdev ) ;
gve_teardown_priv_resources ( priv ) ;
2019-07-01 15:57:54 -07:00
destroy_workqueue ( priv - > gve_wq ) ;
2019-07-01 15:57:52 -07:00
free_netdev ( netdev ) ;
pci_iounmap ( pdev , db_bar ) ;
pci_iounmap ( pdev , reg_bar ) ;
pci_release_regions ( pdev ) ;
pci_disable_device ( pdev ) ;
}
2021-12-15 16:46:50 -08:00
static void gve_shutdown ( struct pci_dev * pdev )
{
struct net_device * netdev = pci_get_drvdata ( pdev ) ;
struct gve_priv * priv = netdev_priv ( netdev ) ;
2024-08-01 13:56:19 -07:00
bool was_up = netif_running ( priv - > dev ) ;
2021-12-15 16:46:50 -08:00
rtnl_lock ( ) ;
2025-03-28 09:47:42 -07:00
netdev_lock ( netdev ) ;
2021-12-15 16:46:50 -08:00
if ( was_up & & gve_close ( priv - > dev ) ) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown ( priv , was_up ) ;
} else {
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources ( priv ) ;
}
2025-03-28 09:47:42 -07:00
netdev_unlock ( netdev ) ;
2021-12-15 16:46:50 -08:00
rtnl_unlock ( ) ;
}
# ifdef CONFIG_PM
static int gve_suspend ( struct pci_dev * pdev , pm_message_t state )
{
struct net_device * netdev = pci_get_drvdata ( pdev ) ;
struct gve_priv * priv = netdev_priv ( netdev ) ;
2024-08-01 13:56:19 -07:00
bool was_up = netif_running ( priv - > dev ) ;
2021-12-15 16:46:50 -08:00
priv - > suspend_cnt + + ;
rtnl_lock ( ) ;
2025-03-05 08:37:23 -08:00
netdev_lock ( netdev ) ;
2021-12-15 16:46:50 -08:00
if ( was_up & & gve_close ( priv - > dev ) ) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown ( priv , was_up ) ;
} else {
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources ( priv ) ;
}
priv - > up_before_suspend = was_up ;
2025-03-05 08:37:23 -08:00
netdev_unlock ( netdev ) ;
2021-12-15 16:46:50 -08:00
rtnl_unlock ( ) ;
return 0 ;
}
static int gve_resume ( struct pci_dev * pdev )
{
struct net_device * netdev = pci_get_drvdata ( pdev ) ;
struct gve_priv * priv = netdev_priv ( netdev ) ;
int err ;
priv - > resume_cnt + + ;
rtnl_lock ( ) ;
2025-03-05 08:37:23 -08:00
netdev_lock ( netdev ) ;
2021-12-15 16:46:50 -08:00
err = gve_reset_recovery ( priv , priv - > up_before_suspend ) ;
2025-03-05 08:37:23 -08:00
netdev_unlock ( netdev ) ;
2021-12-15 16:46:50 -08:00
rtnl_unlock ( ) ;
return err ;
}
# endif /* CONFIG_PM */
2019-07-01 15:57:52 -07:00
static const struct pci_device_id gve_id_table [ ] = {
{ PCI_DEVICE ( PCI_VENDOR_ID_GOOGLE , PCI_DEV_ID_GVNIC ) } ,
{ }
} ;
2023-07-08 11:14:51 +08:00
static struct pci_driver gve_driver = {
. name = gve_driver_name ,
2019-07-01 15:57:52 -07:00
. id_table = gve_id_table ,
. probe = gve_probe ,
. remove = gve_remove ,
2021-12-15 16:46:50 -08:00
. shutdown = gve_shutdown ,
# ifdef CONFIG_PM
. suspend = gve_suspend ,
. resume = gve_resume ,
# endif
2019-07-01 15:57:52 -07:00
} ;
2023-07-08 11:14:51 +08:00
module_pci_driver ( gve_driver ) ;
2019-07-01 15:57:52 -07:00
MODULE_DEVICE_TABLE ( pci , gve_id_table ) ;
MODULE_AUTHOR ( " Google, Inc. " ) ;
2023-07-08 11:14:51 +08:00
MODULE_DESCRIPTION ( " Google Virtual NIC Driver " ) ;
2019-07-01 15:57:52 -07:00
MODULE_LICENSE ( " Dual MIT/GPL " ) ;
MODULE_VERSION ( GVE_VERSION ) ;