2021-08-19 17:08:48 -07:00
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2019-2021, Intel Corporation. */
# include "ice.h"
2021-08-19 17:08:54 -07:00
# include "ice_lib.h"
2021-08-19 17:08:48 -07:00
# include "ice_eswitch.h"
2023-07-12 13:03:31 +02:00
# include "ice_eswitch_br.h"
2021-08-19 17:08:54 -07:00
# include "ice_fltr.h"
# include "ice_repr.h"
2024-03-25 22:34:31 +01:00
# include "devlink/devlink.h"
2021-08-06 10:49:06 +02:00
# include "ice_tc_lib.h"
2021-08-19 17:08:48 -07:00
2021-08-19 17:08:54 -07:00
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_setup_env - configure eswitch HW filters
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF struct
*
* This function adds HW filters configuration specific for switchdev
* mode .
*/
static int ice_eswitch_setup_env ( struct ice_pf * pf )
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * uplink_vsi = pf - > eswitch . uplink_vsi ;
struct net_device * netdev = uplink_vsi - > netdev ;
2024-03-01 12:54:13 +01:00
bool if_running = netif_running ( netdev ) ;
2021-12-02 08:38:46 -08:00
struct ice_vsi_vlan_ops * vlan_ops ;
2021-08-19 17:08:54 -07:00
2024-03-01 12:54:13 +01:00
if ( if_running & & ! test_and_set_bit ( ICE_VSI_DOWN , uplink_vsi - > state ) )
if ( ice_down ( uplink_vsi ) )
return - ENODEV ;
2021-08-19 17:08:54 -07:00
ice_remove_vsi_fltr ( & pf - > hw , uplink_vsi - > idx ) ;
2023-10-24 13:09:15 +02:00
netif_addr_lock_bh ( netdev ) ;
__dev_uc_unsync ( netdev , NULL ) ;
__dev_mc_unsync ( netdev , NULL ) ;
netif_addr_unlock_bh ( netdev ) ;
2021-10-26 17:46:28 +02:00
2021-12-02 08:38:40 -08:00
if ( ice_vsi_add_vlan_zero ( uplink_vsi ) )
2024-03-01 12:54:09 +01:00
goto err_vlan_zero ;
if ( ice_cfg_dflt_vsi ( uplink_vsi - > port_info , uplink_vsi - > idx , true ,
ICE_FLTR_RX ) )
2021-08-19 17:08:54 -07:00
goto err_def_rx ;
2024-03-01 12:54:09 +01:00
if ( ice_cfg_dflt_vsi ( uplink_vsi - > port_info , uplink_vsi - > idx , true ,
ICE_FLTR_TX ) )
goto err_def_tx ;
2021-08-19 17:08:54 -07:00
2023-07-12 13:03:29 +02:00
vlan_ops = ice_get_compat_vsi_vlan_ops ( uplink_vsi ) ;
if ( vlan_ops - > dis_rx_filtering ( uplink_vsi ) )
2024-03-01 12:54:09 +01:00
goto err_vlan_filtering ;
2023-07-12 13:03:29 +02:00
2021-08-19 17:08:54 -07:00
if ( ice_vsi_update_security ( uplink_vsi , ice_vsi_ctx_set_allow_override ) )
goto err_override_uplink ;
2023-07-12 13:03:30 +02:00
if ( ice_vsi_update_local_lb ( uplink_vsi , true ) )
goto err_override_local_lb ;
2024-03-01 12:54:13 +01:00
if ( if_running & & ice_up ( uplink_vsi ) )
goto err_up ;
2021-08-19 17:08:54 -07:00
return 0 ;
2024-03-01 12:54:13 +01:00
err_up :
ice_vsi_update_local_lb ( uplink_vsi , false ) ;
2023-07-12 13:03:30 +02:00
err_override_local_lb :
2021-08-19 17:08:54 -07:00
ice_vsi_update_security ( uplink_vsi , ice_vsi_ctx_clear_allow_override ) ;
err_override_uplink :
2023-07-12 13:03:29 +02:00
vlan_ops - > ena_rx_filtering ( uplink_vsi ) ;
2024-03-01 12:54:09 +01:00
err_vlan_filtering :
ice_cfg_dflt_vsi ( uplink_vsi - > port_info , uplink_vsi - > idx , false ,
ICE_FLTR_TX ) ;
err_def_tx :
ice_cfg_dflt_vsi ( uplink_vsi - > port_info , uplink_vsi - > idx , false ,
ICE_FLTR_RX ) ;
2021-08-19 17:08:54 -07:00
err_def_rx :
2024-03-01 12:54:09 +01:00
ice_vsi_del_vlan_zero ( uplink_vsi ) ;
err_vlan_zero :
2021-08-19 17:08:54 -07:00
ice_fltr_add_mac_and_broadcast ( uplink_vsi ,
uplink_vsi - > port_info - > mac . perm_addr ,
ICE_FWD_TO_VSI ) ;
2024-03-01 12:54:13 +01:00
if ( if_running )
ice_up ( uplink_vsi ) ;
2021-08-19 17:08:54 -07:00
return - ENODEV ;
}
2022-02-16 13:37:28 -08:00
/**
2023-10-24 13:09:27 +02:00
* ice_eswitch_release_repr - clear PR VSI configuration
2022-02-16 13:37:28 -08:00
* @ pf : poiner to PF struct
2023-10-24 13:09:27 +02:00
* @ repr : pointer to PR
2022-02-16 13:37:28 -08:00
*/
static void
2023-10-24 13:09:27 +02:00
ice_eswitch_release_repr ( struct ice_pf * pf , struct ice_repr * repr )
2022-02-16 13:37:28 -08:00
{
2023-10-24 13:09:27 +02:00
struct ice_vsi * vsi = repr - > src_vsi ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2023-10-24 13:09:27 +02:00
/* Skip representors that aren't configured */
if ( ! repr - > dst )
return ;
2022-02-16 13:37:28 -08:00
2023-10-24 13:09:27 +02:00
ice_vsi_update_security ( vsi , ice_vsi_ctx_set_antispoof ) ;
metadata_dst_free ( repr - > dst ) ;
repr - > dst = NULL ;
ice_fltr_add_mac_and_broadcast ( vsi , repr - > parent_mac ,
ICE_FWD_TO_VSI ) ;
2022-02-16 13:37:28 -08:00
}
2021-08-19 17:08:54 -07:00
/**
2023-10-24 13:09:27 +02:00
* ice_eswitch_setup_repr - configure PR to run in switchdev mode
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF struct
2023-10-24 13:09:27 +02:00
* @ repr : pointer to PR struct
2021-08-19 17:08:54 -07:00
*/
2023-10-24 13:09:27 +02:00
static int ice_eswitch_setup_repr ( struct ice_pf * pf , struct ice_repr * repr )
2021-08-19 17:08:54 -07:00
{
2024-03-01 12:54:08 +01:00
struct ice_vsi * uplink_vsi = pf - > eswitch . uplink_vsi ;
2023-10-24 13:09:27 +02:00
struct ice_vsi * vsi = repr - > src_vsi ;
struct metadata_dst * dst ;
2023-04-07 18:52:19 +02:00
2023-10-24 13:09:27 +02:00
ice_remove_vsi_fltr ( & pf - > hw , vsi - > idx ) ;
repr - > dst = metadata_dst_alloc ( 0 , METADATA_HW_PORT_MUX ,
GFP_KERNEL ) ;
if ( ! repr - > dst )
goto err_add_mac_fltr ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
if ( ice_vsi_update_security ( vsi , ice_vsi_ctx_clear_antispoof ) )
goto err_dst_free ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
if ( ice_vsi_add_vlan_zero ( vsi ) )
goto err_update_security ;
2021-08-19 17:08:54 -07:00
2024-03-01 12:54:08 +01:00
netif_keep_dst ( uplink_vsi - > netdev ) ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
dst = repr - > dst ;
dst - > u . port_info . port_id = vsi - > vsi_num ;
2024-03-01 12:54:08 +01:00
dst - > u . port_info . lower_dev = uplink_vsi - > netdev ;
2021-08-19 17:08:54 -07:00
return 0 ;
2023-10-24 13:09:27 +02:00
err_update_security :
ice_vsi_update_security ( vsi , ice_vsi_ctx_set_antispoof ) ;
err_dst_free :
metadata_dst_free ( repr - > dst ) ;
repr - > dst = NULL ;
err_add_mac_fltr :
ice_fltr_add_mac_and_broadcast ( vsi , repr - > parent_mac , ICE_FWD_TO_VSI ) ;
2021-08-19 17:08:54 -07:00
return - ENODEV ;
}
2021-08-19 17:08:56 -07:00
/**
2023-10-24 13:09:20 +02:00
* ice_eswitch_update_repr - reconfigure port representor
2023-10-24 13:09:22 +02:00
* @ repr_id : representor ID
2023-10-24 13:09:21 +02:00
* @ vsi : VSI for which port representor is configured
2021-08-19 17:08:56 -07:00
*/
2023-10-24 13:09:22 +02:00
void ice_eswitch_update_repr ( unsigned long repr_id , struct ice_vsi * vsi )
2021-08-19 17:08:56 -07:00
{
struct ice_pf * pf = vsi - > back ;
2023-10-24 13:09:22 +02:00
struct ice_repr * repr ;
2021-08-19 17:08:56 -07:00
int ret ;
if ( ! ice_is_switchdev_running ( pf ) )
return ;
2023-10-24 13:09:22 +02:00
repr = xa_load ( & pf - > eswitch . reprs , repr_id ) ;
if ( ! repr )
return ;
2021-08-19 17:08:56 -07:00
repr - > src_vsi = vsi ;
repr - > dst - > u . port_info . port_id = vsi - > vsi_num ;
2023-07-12 13:03:31 +02:00
if ( repr - > br_port )
repr - > br_port - > vsi = vsi ;
2021-08-19 17:08:56 -07:00
ret = ice_vsi_update_security ( vsi , ice_vsi_ctx_clear_antispoof ) ;
if ( ret ) {
2023-10-24 13:09:21 +02:00
ice_fltr_add_mac_and_broadcast ( vsi , repr - > parent_mac ,
ICE_FWD_TO_VSI ) ;
dev_err ( ice_pf_to_dev ( pf ) , " Failed to update VSI of port representor %d " ,
repr - > id ) ;
2021-08-19 17:08:56 -07:00
}
}
2021-08-19 17:08:58 -07:00
/**
* ice_eswitch_port_start_xmit - callback for packets transmit
* @ skb : send buffer
* @ netdev : network interface device structure
*
* Returns NETDEV_TX_OK if sent , else an error code
*/
netdev_tx_t
ice_eswitch_port_start_xmit ( struct sk_buff * skb , struct net_device * netdev )
{
2024-03-01 12:54:08 +01:00
struct ice_repr * repr = ice_netdev_to_repr ( netdev ) ;
2024-03-01 12:54:14 +01:00
unsigned int len = skb - > len ;
int ret ;
2021-08-19 17:08:58 -07:00
skb_dst_drop ( skb ) ;
dst_hold ( ( struct dst_entry * ) repr - > dst ) ;
skb_dst_set ( skb , ( struct dst_entry * ) repr - > dst ) ;
2024-03-01 12:54:08 +01:00
skb - > dev = repr - > dst - > u . port_info . lower_dev ;
2021-08-19 17:08:58 -07:00
2024-03-01 12:54:14 +01:00
ret = dev_queue_xmit ( skb ) ;
ice_repr_inc_tx_stats ( repr , len , ret ) ;
return ret ;
2021-08-19 17:08:58 -07:00
}
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_set_target_vsi - set eswitch context in Tx context descriptor
2021-08-19 17:08:58 -07:00
* @ skb : pointer to send buffer
* @ off : pointer to offload struct
*/
void
ice_eswitch_set_target_vsi ( struct sk_buff * skb ,
struct ice_tx_offload_params * off )
{
struct metadata_dst * dst = skb_metadata_dst ( skb ) ;
u64 cd_cmd , dst_vsi ;
if ( ! dst ) {
cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK < < ICE_TXD_CTX_QW1_CMD_S ;
off - > cd_qw1 | = ( cd_cmd | ICE_TX_DESC_DTYPE_CTX ) ;
} else {
cd_cmd = ICE_TX_CTX_DESC_SWTCH_VSI < < ICE_TXD_CTX_QW1_CMD_S ;
2023-12-05 17:01:05 -08:00
dst_vsi = FIELD_PREP ( ICE_TXD_CTX_QW1_VSI_M ,
dst - > u . port_info . port_id ) ;
2021-08-19 17:08:58 -07:00
off - > cd_qw1 = cd_cmd | dst_vsi | ICE_TX_DESC_DTYPE_CTX ;
}
}
2021-08-19 17:08:54 -07:00
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_release_env - clear eswitch HW filters
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF struct
*
* This function removes HW filters configuration specific for switchdev
* mode and restores default legacy mode settings .
*/
static void ice_eswitch_release_env ( struct ice_pf * pf )
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * uplink_vsi = pf - > eswitch . uplink_vsi ;
2023-07-12 13:03:29 +02:00
struct ice_vsi_vlan_ops * vlan_ops ;
vlan_ops = ice_get_compat_vsi_vlan_ops ( uplink_vsi ) ;
2021-08-19 17:08:54 -07:00
2023-07-12 13:03:30 +02:00
ice_vsi_update_local_lb ( uplink_vsi , false ) ;
2021-08-19 17:08:54 -07:00
ice_vsi_update_security ( uplink_vsi , ice_vsi_ctx_clear_allow_override ) ;
2023-07-12 13:03:29 +02:00
vlan_ops - > ena_rx_filtering ( uplink_vsi ) ;
2024-04-23 16:36:32 +02:00
ice_cfg_dflt_vsi ( uplink_vsi - > port_info , uplink_vsi - > idx , false ,
ICE_FLTR_TX ) ;
ice_cfg_dflt_vsi ( uplink_vsi - > port_info , uplink_vsi - > idx , false ,
ICE_FLTR_RX ) ;
2021-08-19 17:08:54 -07:00
ice_fltr_add_mac_and_broadcast ( uplink_vsi ,
uplink_vsi - > port_info - > mac . perm_addr ,
ICE_FWD_TO_VSI ) ;
}
/**
* ice_eswitch_enable_switchdev - configure eswitch in switchdev mode
* @ pf : pointer to PF structure
*/
static int ice_eswitch_enable_switchdev ( struct ice_pf * pf )
{
2024-03-01 12:54:11 +01:00
struct ice_vsi * uplink_vsi ;
2023-07-12 13:03:31 +02:00
uplink_vsi = ice_get_main_vsi ( pf ) ;
if ( ! uplink_vsi )
return - ENODEV ;
if ( netif_is_any_bridge_port ( uplink_vsi - > netdev ) ) {
dev_err ( ice_pf_to_dev ( pf ) ,
" Uplink port cannot be a bridge port \n " ) ;
return - EINVAL ;
}
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:15 +02:00
pf - > eswitch . uplink_vsi = uplink_vsi ;
2021-08-19 17:08:54 -07:00
if ( ice_eswitch_setup_env ( pf ) )
2024-03-01 12:54:11 +01:00
return - ENODEV ;
2021-08-19 17:08:54 -07:00
2023-07-12 13:03:31 +02:00
if ( ice_eswitch_br_offloads_init ( pf ) )
goto err_br_offloads ;
2023-10-24 13:09:27 +02:00
pf - > eswitch . is_running = true ;
2021-08-19 17:08:54 -07:00
return 0 ;
2023-07-12 13:03:31 +02:00
err_br_offloads :
2021-08-19 17:08:54 -07:00
ice_eswitch_release_env ( pf ) ;
return - ENODEV ;
}
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_disable_switchdev - disable eswitch resources
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF structure
*/
static void ice_eswitch_disable_switchdev ( struct ice_pf * pf )
{
2023-07-12 13:03:31 +02:00
ice_eswitch_br_offloads_deinit ( pf ) ;
2021-08-19 17:08:54 -07:00
ice_eswitch_release_env ( pf ) ;
2023-10-24 13:09:27 +02:00
pf - > eswitch . is_running = false ;
2021-08-19 17:08:54 -07:00
}
2021-08-19 17:08:48 -07:00
/**
* ice_eswitch_mode_set - set new eswitch mode
* @ devlink : pointer to devlink structure
* @ mode : eswitch mode to switch to
* @ extack : pointer to extack structure
*/
int
ice_eswitch_mode_set ( struct devlink * devlink , u16 mode ,
struct netlink_ext_ack * extack )
{
struct ice_pf * pf = devlink_priv ( devlink ) ;
if ( pf - > eswitch_mode = = mode )
return 0 ;
2022-02-16 13:37:37 -08:00
if ( ice_has_vfs ( pf ) ) {
2021-08-19 17:08:48 -07:00
dev_info ( ice_pf_to_dev ( pf ) , " Changing eswitch mode is allowed only if there is no VFs created " ) ;
NL_SET_ERR_MSG_MOD ( extack , " Changing eswitch mode is allowed only if there is no VFs created " ) ;
return - EOPNOTSUPP ;
}
switch ( mode ) {
case DEVLINK_ESWITCH_MODE_LEGACY :
dev_info ( ice_pf_to_dev ( pf ) , " PF %d changed eswitch mode to legacy " ,
pf - > hw . pf_id ) ;
2023-10-24 13:09:20 +02:00
xa_destroy ( & pf - > eswitch . reprs ) ;
2021-08-19 17:08:48 -07:00
NL_SET_ERR_MSG_MOD ( extack , " Changed eswitch mode to legacy " ) ;
break ;
case DEVLINK_ESWITCH_MODE_SWITCHDEV :
{
2023-08-16 12:34:05 -07:00
if ( ice_is_adq_active ( pf ) ) {
dev_err ( ice_pf_to_dev ( pf ) , " Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root " ) ;
NL_SET_ERR_MSG_MOD ( extack , " Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root " ) ;
return - EOPNOTSUPP ;
}
2021-08-19 17:08:48 -07:00
dev_info ( ice_pf_to_dev ( pf ) , " PF %d changed eswitch mode to switchdev " ,
pf - > hw . pf_id ) ;
2024-03-01 12:54:12 +01:00
xa_init ( & pf - > eswitch . reprs ) ;
2021-08-19 17:08:48 -07:00
NL_SET_ERR_MSG_MOD ( extack , " Changed eswitch mode to switchdev " ) ;
break ;
}
default :
NL_SET_ERR_MSG_MOD ( extack , " Unknown eswitch mode " ) ;
return - EINVAL ;
}
pf - > eswitch_mode = mode ;
return 0 ;
}
/**
* ice_eswitch_mode_get - get current eswitch mode
* @ devlink : pointer to devlink structure
* @ mode : output parameter for current eswitch mode
*/
int ice_eswitch_mode_get ( struct devlink * devlink , u16 * mode )
{
struct ice_pf * pf = devlink_priv ( devlink ) ;
* mode = pf - > eswitch_mode ;
return 0 ;
}
2021-08-19 17:08:54 -07:00
2021-08-19 17:08:56 -07:00
/**
* ice_is_eswitch_mode_switchdev - check if eswitch mode is set to switchdev
* @ pf : pointer to PF structure
*
* Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV ,
* false otherwise .
*/
bool ice_is_eswitch_mode_switchdev ( struct ice_pf * pf )
{
return pf - > eswitch_mode = = DEVLINK_ESWITCH_MODE_SWITCHDEV ;
}
2021-08-19 17:08:57 -07:00
/**
* ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
* @ pf : pointer to PF structure
*/
static void ice_eswitch_start_all_tx_queues ( struct ice_pf * pf )
{
2023-10-24 13:09:20 +02:00
struct ice_repr * repr ;
unsigned long id ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2021-08-19 17:08:57 -07:00
if ( test_bit ( ICE_DOWN , pf - > state ) )
return ;
2023-10-24 13:09:20 +02:00
xa_for_each ( & pf - > eswitch . reprs , id , repr )
ice_repr_start_tx_queues ( repr ) ;
2021-08-19 17:08:57 -07:00
}
/**
* ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors
* @ pf : pointer to PF structure
*/
void ice_eswitch_stop_all_tx_queues ( struct ice_pf * pf )
{
2023-10-24 13:09:20 +02:00
struct ice_repr * repr ;
unsigned long id ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2021-08-19 17:08:57 -07:00
if ( test_bit ( ICE_DOWN , pf - > state ) )
return ;
2023-10-24 13:09:20 +02:00
xa_for_each ( & pf - > eswitch . reprs , id , repr )
ice_repr_stop_tx_queues ( repr ) ;
2021-08-19 17:08:57 -07:00
}
2023-10-24 13:09:27 +02:00
static void ice_eswitch_stop_reprs ( struct ice_pf * pf )
{
ice_eswitch_stop_all_tx_queues ( pf ) ;
}
static void ice_eswitch_start_reprs ( struct ice_pf * pf )
{
ice_eswitch_start_all_tx_queues ( pf ) ;
}
int
ice_eswitch_attach ( struct ice_pf * pf , struct ice_vf * vf )
{
struct ice_repr * repr ;
int err ;
if ( pf - > eswitch_mode = = DEVLINK_ESWITCH_MODE_LEGACY )
return 0 ;
if ( xa_empty ( & pf - > eswitch . reprs ) ) {
err = ice_eswitch_enable_switchdev ( pf ) ;
if ( err )
return err ;
}
ice_eswitch_stop_reprs ( pf ) ;
repr = ice_repr_add_vf ( vf ) ;
2023-11-27 15:59:17 +03:00
if ( IS_ERR ( repr ) ) {
err = PTR_ERR ( repr ) ;
2023-10-24 13:09:27 +02:00
goto err_create_repr ;
2023-11-27 15:59:17 +03:00
}
2023-10-24 13:09:27 +02:00
err = ice_eswitch_setup_repr ( pf , repr ) ;
if ( err )
goto err_setup_repr ;
2024-03-01 12:54:12 +01:00
err = xa_insert ( & pf - > eswitch . reprs , repr - > id , repr , GFP_KERNEL ) ;
2023-10-24 13:09:27 +02:00
if ( err )
goto err_xa_alloc ;
vf - > repr_id = repr - > id ;
ice_eswitch_start_reprs ( pf ) ;
return 0 ;
err_xa_alloc :
ice_eswitch_release_repr ( pf , repr ) ;
err_setup_repr :
ice_repr_rem_vf ( repr ) ;
err_create_repr :
if ( xa_empty ( & pf - > eswitch . reprs ) )
ice_eswitch_disable_switchdev ( pf ) ;
ice_eswitch_start_reprs ( pf ) ;
return err ;
}
void ice_eswitch_detach ( struct ice_pf * pf , struct ice_vf * vf )
{
struct ice_repr * repr = xa_load ( & pf - > eswitch . reprs , vf - > repr_id ) ;
struct devlink * devlink = priv_to_devlink ( pf ) ;
if ( ! repr )
return ;
ice_eswitch_stop_reprs ( pf ) ;
xa_erase ( & pf - > eswitch . reprs , repr - > id ) ;
if ( xa_empty ( & pf - > eswitch . reprs ) )
ice_eswitch_disable_switchdev ( pf ) ;
ice_eswitch_release_repr ( pf , repr ) ;
ice_repr_rem_vf ( repr ) ;
if ( xa_empty ( & pf - > eswitch . reprs ) ) {
/* since all port representors are destroyed, there is
* no point in keeping the nodes
*/
ice_devlink_rate_clear_tx_topology ( ice_get_main_vsi ( pf ) ) ;
devl_lock ( devlink ) ;
devl_rate_nodes_destroy ( devlink ) ;
devl_unlock ( devlink ) ;
} else {
ice_eswitch_start_reprs ( pf ) ;
}
}
2023-10-24 13:09:28 +02:00
/**
* ice_eswitch_rebuild - rebuild eswitch
* @ pf : pointer to PF structure
*/
2024-03-01 12:54:11 +01:00
void ice_eswitch_rebuild ( struct ice_pf * pf )
2023-10-24 13:09:28 +02:00
{
struct ice_repr * repr ;
unsigned long id ;
if ( ! ice_is_switchdev_running ( pf ) )
2024-03-01 12:54:11 +01:00
return ;
2023-10-24 13:09:28 +02:00
xa_for_each ( & pf - > eswitch . reprs , id , repr )
ice_eswitch_detach ( pf , repr - > vf ) ;
}
2024-03-01 12:54:13 +01:00
/**
* ice_eswitch_get_target - get netdev based on src_vsi from descriptor
* @ rx_ring : ring used to receive the packet
* @ rx_desc : descriptor used to get src_vsi value
*
* Get src_vsi value from descriptor and load correct representor . If it isn ' t
* found return rx_ring - > netdev .
*/
struct net_device * ice_eswitch_get_target ( struct ice_rx_ring * rx_ring ,
union ice_32b_rx_flex_desc * rx_desc )
{
struct ice_eswitch * eswitch = & rx_ring - > vsi - > back - > eswitch ;
struct ice_32b_rx_flex_desc_nic_2 * desc ;
struct ice_repr * repr ;
desc = ( struct ice_32b_rx_flex_desc_nic_2 * ) rx_desc ;
repr = xa_load ( & eswitch - > reprs , le16_to_cpu ( desc - > src_vsi ) ) ;
if ( ! repr )
return rx_ring - > netdev ;
return repr - > netdev ;
}