2021-08-19 17:08:48 -07:00
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2019-2021, Intel Corporation. */
# include "ice.h"
2021-08-19 17:08:54 -07:00
# include "ice_lib.h"
2021-08-19 17:08:48 -07:00
# include "ice_eswitch.h"
2023-07-12 13:03:31 +02:00
# include "ice_eswitch_br.h"
2021-08-19 17:08:54 -07:00
# include "ice_fltr.h"
# include "ice_repr.h"
2021-08-19 17:08:48 -07:00
# include "ice_devlink.h"
2021-08-06 10:49:06 +02:00
# include "ice_tc_lib.h"
2021-08-19 17:08:48 -07:00
2023-10-24 13:09:27 +02:00
/**
* ice_eswitch_del_sp_rules - delete adv rules added on PRs
* @ pf : pointer to the PF struct
*
* Delete all advanced rules that were used to forward packets with the
* device ' s VSI index to the corresponding eswitch ctrl VSI queue .
*/
static void ice_eswitch_del_sp_rules ( struct ice_pf * pf )
{
struct ice_repr * repr ;
unsigned long id ;
xa_for_each ( & pf - > eswitch . reprs , id , repr ) {
if ( repr - > sp_rule . rid )
ice_rem_adv_rule_by_id ( & pf - > hw , & repr - > sp_rule ) ;
}
}
2021-10-26 12:38:40 +02:00
/**
2023-10-24 13:09:20 +02:00
* ice_eswitch_add_sp_rule - add adv rule with device ' s VSI index
2021-10-26 12:38:40 +02:00
* @ pf : pointer to PF struct
2023-10-24 13:09:20 +02:00
* @ repr : pointer to the repr struct
2021-10-26 12:38:40 +02:00
*
* This function adds advanced rule that forwards packets with
2023-10-24 13:09:20 +02:00
* device ' s VSI index to the corresponding eswitch ctrl VSI queue .
2021-10-26 12:38:40 +02:00
*/
2023-10-24 13:09:27 +02:00
static int ice_eswitch_add_sp_rule ( struct ice_pf * pf , struct ice_repr * repr )
2021-10-26 12:38:40 +02:00
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * ctrl_vsi = pf - > eswitch . control_vsi ;
2021-10-26 12:38:40 +02:00
struct ice_adv_rule_info rule_info = { 0 } ;
struct ice_adv_lkup_elem * list ;
struct ice_hw * hw = & pf - > hw ;
const u16 lkups_cnt = 1 ;
int err ;
list = kcalloc ( lkups_cnt , sizeof ( * list ) , GFP_ATOMIC ) ;
if ( ! list )
return - ENOMEM ;
2023-04-07 18:52:19 +02:00
ice_rule_add_src_vsi_metadata ( list ) ;
2021-10-26 12:38:40 +02:00
2023-04-07 18:52:19 +02:00
rule_info . sw_act . flag = ICE_FLTR_TX ;
2021-10-26 12:38:40 +02:00
rule_info . sw_act . vsi_handle = ctrl_vsi - > idx ;
rule_info . sw_act . fltr_act = ICE_FWD_TO_Q ;
rule_info . sw_act . fwd_id . q_id = hw - > func_caps . common_cap . rxq_first_id +
2023-10-24 13:09:20 +02:00
ctrl_vsi - > rxq_map [ repr - > q_id ] ;
2021-10-26 12:38:40 +02:00
rule_info . flags_info . act | = ICE_SINGLE_ACT_LB_ENABLE ;
rule_info . flags_info . act_valid = true ;
2021-12-17 12:36:25 +01:00
rule_info . tun_type = ICE_SW_TUN_AND_NON_TUN ;
2023-10-24 13:09:20 +02:00
rule_info . src_vsi = repr - > src_vsi - > idx ;
2021-10-26 12:38:40 +02:00
err = ice_add_adv_rule ( hw , list , lkups_cnt , & rule_info ,
2023-10-24 13:09:20 +02:00
& repr - > sp_rule ) ;
2021-10-26 12:38:40 +02:00
if ( err )
2023-10-24 13:09:21 +02:00
dev_err ( ice_pf_to_dev ( pf ) , " Unable to add slow-path rule for eswitch for PR %d " ,
repr - > id ) ;
2021-10-26 12:38:40 +02:00
kfree ( list ) ;
return err ;
}
2023-10-24 13:09:27 +02:00
static int
ice_eswitch_add_sp_rules ( struct ice_pf * pf )
2021-10-26 12:38:40 +02:00
{
2023-10-24 13:09:27 +02:00
struct ice_repr * repr ;
unsigned long id ;
int err ;
xa_for_each ( & pf - > eswitch . reprs , id , repr ) {
err = ice_eswitch_add_sp_rule ( pf , repr ) ;
if ( err ) {
ice_eswitch_del_sp_rules ( pf ) ;
return err ;
}
}
return 0 ;
2021-10-26 12:38:40 +02:00
}
2021-08-19 17:08:54 -07:00
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_setup_env - configure eswitch HW filters
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF struct
*
* This function adds HW filters configuration specific for switchdev
* mode .
*/
static int ice_eswitch_setup_env ( struct ice_pf * pf )
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * uplink_vsi = pf - > eswitch . uplink_vsi ;
struct ice_vsi * ctrl_vsi = pf - > eswitch . control_vsi ;
struct net_device * netdev = uplink_vsi - > netdev ;
2021-12-02 08:38:46 -08:00
struct ice_vsi_vlan_ops * vlan_ops ;
2021-08-19 17:08:54 -07:00
bool rule_added = false ;
ice_remove_vsi_fltr ( & pf - > hw , uplink_vsi - > idx ) ;
2023-10-24 13:09:15 +02:00
netif_addr_lock_bh ( netdev ) ;
__dev_uc_unsync ( netdev , NULL ) ;
__dev_mc_unsync ( netdev , NULL ) ;
netif_addr_unlock_bh ( netdev ) ;
2021-10-26 17:46:28 +02:00
2021-12-02 08:38:40 -08:00
if ( ice_vsi_add_vlan_zero ( uplink_vsi ) )
2021-08-19 17:08:54 -07:00
goto err_def_rx ;
2022-07-04 15:12:26 +02:00
if ( ! ice_is_dflt_vsi_in_use ( uplink_vsi - > port_info ) ) {
if ( ice_set_dflt_vsi ( uplink_vsi ) )
2021-08-19 17:08:54 -07:00
goto err_def_rx ;
rule_added = true ;
}
2023-07-12 13:03:29 +02:00
vlan_ops = ice_get_compat_vsi_vlan_ops ( uplink_vsi ) ;
if ( vlan_ops - > dis_rx_filtering ( uplink_vsi ) )
goto err_dis_rx ;
2021-08-19 17:08:54 -07:00
if ( ice_vsi_update_security ( uplink_vsi , ice_vsi_ctx_set_allow_override ) )
goto err_override_uplink ;
if ( ice_vsi_update_security ( ctrl_vsi , ice_vsi_ctx_set_allow_override ) )
goto err_override_control ;
2023-07-12 13:03:30 +02:00
if ( ice_vsi_update_local_lb ( uplink_vsi , true ) )
goto err_override_local_lb ;
2021-08-19 17:08:54 -07:00
return 0 ;
2023-07-12 13:03:30 +02:00
err_override_local_lb :
ice_vsi_update_security ( ctrl_vsi , ice_vsi_ctx_clear_allow_override ) ;
2021-08-19 17:08:54 -07:00
err_override_control :
ice_vsi_update_security ( uplink_vsi , ice_vsi_ctx_clear_allow_override ) ;
err_override_uplink :
2023-07-12 13:03:29 +02:00
vlan_ops - > ena_rx_filtering ( uplink_vsi ) ;
err_dis_rx :
2021-08-19 17:08:54 -07:00
if ( rule_added )
2022-07-04 15:12:26 +02:00
ice_clear_dflt_vsi ( uplink_vsi ) ;
2021-08-19 17:08:54 -07:00
err_def_rx :
ice_fltr_add_mac_and_broadcast ( uplink_vsi ,
uplink_vsi - > port_info - > mac . perm_addr ,
ICE_FWD_TO_VSI ) ;
return - ENODEV ;
}
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_remap_rings_to_vectors - reconfigure rings of eswitch ctrl VSI
2023-10-24 13:09:27 +02:00
* @ eswitch : pointer to eswitch struct
2021-08-19 17:08:54 -07:00
*
2023-10-24 13:09:15 +02:00
* In eswitch number of allocated Tx / Rx rings is equal .
2021-08-19 17:08:54 -07:00
*
* This function fills q_vectors structures associated with representor and
* move each ring pairs to port representor netdevs . Each port representor
* will have dedicated 1 Tx / Rx ring pair , so number of rings pair is equal to
* number of VFs .
*/
2023-10-24 13:09:27 +02:00
static void ice_eswitch_remap_rings_to_vectors ( struct ice_eswitch * eswitch )
2021-08-19 17:08:54 -07:00
{
2023-10-24 13:09:27 +02:00
struct ice_vsi * vsi = eswitch - > control_vsi ;
2023-10-24 13:09:21 +02:00
unsigned long repr_id = 0 ;
2021-08-19 17:08:54 -07:00
int q_id ;
ice_for_each_txq ( vsi , q_id ) {
2022-02-16 13:37:36 -08:00
struct ice_q_vector * q_vector ;
struct ice_tx_ring * tx_ring ;
struct ice_rx_ring * rx_ring ;
struct ice_repr * repr ;
2023-10-24 13:09:27 +02:00
repr = xa_find ( & eswitch - > reprs , & repr_id , U32_MAX ,
2023-10-24 13:09:21 +02:00
XA_PRESENT ) ;
if ( WARN_ON ( ! repr ) )
break ;
2022-02-16 13:37:36 -08:00
2023-10-24 13:09:21 +02:00
repr_id + = 1 ;
repr - > q_id = q_id ;
2022-02-16 13:37:36 -08:00
q_vector = repr - > q_vector ;
tx_ring = vsi - > tx_rings [ q_id ] ;
rx_ring = vsi - > rx_rings [ q_id ] ;
2021-08-19 17:08:54 -07:00
q_vector - > vsi = vsi ;
q_vector - > reg_idx = vsi - > q_vectors [ 0 ] - > reg_idx ;
q_vector - > num_ring_tx = 1 ;
2021-08-19 13:59:58 +02:00
q_vector - > tx . tx_ring = tx_ring ;
tx_ring - > q_vector = q_vector ;
tx_ring - > next = NULL ;
tx_ring - > netdev = repr - > netdev ;
2021-08-19 17:08:54 -07:00
/* In switchdev mode, from OS stack perspective, there is only
* one queue for given netdev , so it needs to be indexed as 0.
*/
tx_ring - > q_index = 0 ;
q_vector - > num_ring_rx = 1 ;
2021-08-19 13:59:58 +02:00
q_vector - > rx . rx_ring = rx_ring ;
rx_ring - > q_vector = q_vector ;
rx_ring - > next = NULL ;
rx_ring - > netdev = repr - > netdev ;
2021-08-19 17:08:54 -07:00
}
}
2022-02-16 13:37:28 -08:00
/**
2023-10-24 13:09:27 +02:00
* ice_eswitch_release_repr - clear PR VSI configuration
2022-02-16 13:37:28 -08:00
* @ pf : poiner to PF struct
2023-10-24 13:09:27 +02:00
* @ repr : pointer to PR
2022-02-16 13:37:28 -08:00
*/
static void
2023-10-24 13:09:27 +02:00
ice_eswitch_release_repr ( struct ice_pf * pf , struct ice_repr * repr )
2022-02-16 13:37:28 -08:00
{
2023-10-24 13:09:27 +02:00
struct ice_vsi * vsi = repr - > src_vsi ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2023-10-24 13:09:27 +02:00
/* Skip representors that aren't configured */
if ( ! repr - > dst )
return ;
2022-02-16 13:37:28 -08:00
2023-10-24 13:09:27 +02:00
ice_vsi_update_security ( vsi , ice_vsi_ctx_set_antispoof ) ;
metadata_dst_free ( repr - > dst ) ;
repr - > dst = NULL ;
ice_fltr_add_mac_and_broadcast ( vsi , repr - > parent_mac ,
ICE_FWD_TO_VSI ) ;
2022-02-16 13:37:28 -08:00
2023-10-24 13:09:27 +02:00
netif_napi_del ( & repr - > q_vector - > napi ) ;
2022-02-16 13:37:28 -08:00
}
2021-08-19 17:08:54 -07:00
/**
2023-10-24 13:09:27 +02:00
* ice_eswitch_setup_repr - configure PR to run in switchdev mode
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF struct
2023-10-24 13:09:27 +02:00
* @ repr : pointer to PR struct
2021-08-19 17:08:54 -07:00
*/
2023-10-24 13:09:27 +02:00
static int ice_eswitch_setup_repr ( struct ice_pf * pf , struct ice_repr * repr )
2021-08-19 17:08:54 -07:00
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * ctrl_vsi = pf - > eswitch . control_vsi ;
2023-10-24 13:09:27 +02:00
struct ice_vsi * vsi = repr - > src_vsi ;
struct metadata_dst * dst ;
2023-04-07 18:52:19 +02:00
2023-10-24 13:09:27 +02:00
ice_remove_vsi_fltr ( & pf - > hw , vsi - > idx ) ;
repr - > dst = metadata_dst_alloc ( 0 , METADATA_HW_PORT_MUX ,
GFP_KERNEL ) ;
if ( ! repr - > dst )
goto err_add_mac_fltr ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
if ( ice_vsi_update_security ( vsi , ice_vsi_ctx_clear_antispoof ) )
goto err_dst_free ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
if ( ice_vsi_add_vlan_zero ( vsi ) )
goto err_update_security ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
netif_napi_add ( repr - > netdev , & repr - > q_vector - > napi ,
ice_napi_poll ) ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
netif_keep_dst ( repr - > netdev ) ;
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:27 +02:00
dst = repr - > dst ;
dst - > u . port_info . port_id = vsi - > vsi_num ;
dst - > u . port_info . lower_dev = repr - > netdev ;
ice_repr_set_traffic_vsi ( repr , ctrl_vsi ) ;
2021-08-19 17:08:54 -07:00
return 0 ;
2023-10-24 13:09:27 +02:00
err_update_security :
ice_vsi_update_security ( vsi , ice_vsi_ctx_set_antispoof ) ;
err_dst_free :
metadata_dst_free ( repr - > dst ) ;
repr - > dst = NULL ;
err_add_mac_fltr :
ice_fltr_add_mac_and_broadcast ( vsi , repr - > parent_mac , ICE_FWD_TO_VSI ) ;
2021-08-19 17:08:54 -07:00
return - ENODEV ;
}
2021-08-19 17:08:56 -07:00
/**
2023-10-24 13:09:20 +02:00
* ice_eswitch_update_repr - reconfigure port representor
2023-10-24 13:09:22 +02:00
* @ repr_id : representor ID
2023-10-24 13:09:21 +02:00
* @ vsi : VSI for which port representor is configured
2021-08-19 17:08:56 -07:00
*/
2023-10-24 13:09:22 +02:00
void ice_eswitch_update_repr ( unsigned long repr_id , struct ice_vsi * vsi )
2021-08-19 17:08:56 -07:00
{
struct ice_pf * pf = vsi - > back ;
2023-10-24 13:09:22 +02:00
struct ice_repr * repr ;
2021-08-19 17:08:56 -07:00
int ret ;
if ( ! ice_is_switchdev_running ( pf ) )
return ;
2023-10-24 13:09:22 +02:00
repr = xa_load ( & pf - > eswitch . reprs , repr_id ) ;
if ( ! repr )
return ;
2021-08-19 17:08:56 -07:00
repr - > src_vsi = vsi ;
repr - > dst - > u . port_info . port_id = vsi - > vsi_num ;
2023-07-12 13:03:31 +02:00
if ( repr - > br_port )
repr - > br_port - > vsi = vsi ;
2021-08-19 17:08:56 -07:00
ret = ice_vsi_update_security ( vsi , ice_vsi_ctx_clear_antispoof ) ;
if ( ret ) {
2023-10-24 13:09:21 +02:00
ice_fltr_add_mac_and_broadcast ( vsi , repr - > parent_mac ,
ICE_FWD_TO_VSI ) ;
dev_err ( ice_pf_to_dev ( pf ) , " Failed to update VSI of port representor %d " ,
repr - > id ) ;
2021-08-19 17:08:56 -07:00
}
}
2021-08-19 17:08:58 -07:00
/**
* ice_eswitch_port_start_xmit - callback for packets transmit
* @ skb : send buffer
* @ netdev : network interface device structure
*
* Returns NETDEV_TX_OK if sent , else an error code
*/
netdev_tx_t
ice_eswitch_port_start_xmit ( struct sk_buff * skb , struct net_device * netdev )
{
struct ice_netdev_priv * np ;
struct ice_repr * repr ;
struct ice_vsi * vsi ;
np = netdev_priv ( netdev ) ;
vsi = np - > vsi ;
2023-07-12 13:03:28 +02:00
if ( ! vsi | | ! ice_is_switchdev_running ( vsi - > back ) )
return NETDEV_TX_BUSY ;
2022-04-08 09:56:10 +02:00
if ( ice_is_reset_in_progress ( vsi - > back - > state ) | |
test_bit ( ICE_VF_DIS , vsi - > back - > state ) )
2021-08-19 17:08:58 -07:00
return NETDEV_TX_BUSY ;
repr = ice_netdev_to_repr ( netdev ) ;
skb_dst_drop ( skb ) ;
dst_hold ( ( struct dst_entry * ) repr - > dst ) ;
skb_dst_set ( skb , ( struct dst_entry * ) repr - > dst ) ;
2023-10-24 13:09:21 +02:00
skb - > queue_mapping = repr - > q_id ;
2021-08-19 17:08:58 -07:00
return ice_start_xmit ( skb , netdev ) ;
}
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_set_target_vsi - set eswitch context in Tx context descriptor
2021-08-19 17:08:58 -07:00
* @ skb : pointer to send buffer
* @ off : pointer to offload struct
*/
void
ice_eswitch_set_target_vsi ( struct sk_buff * skb ,
struct ice_tx_offload_params * off )
{
struct metadata_dst * dst = skb_metadata_dst ( skb ) ;
u64 cd_cmd , dst_vsi ;
if ( ! dst ) {
cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK < < ICE_TXD_CTX_QW1_CMD_S ;
off - > cd_qw1 | = ( cd_cmd | ICE_TX_DESC_DTYPE_CTX ) ;
} else {
cd_cmd = ICE_TX_CTX_DESC_SWTCH_VSI < < ICE_TXD_CTX_QW1_CMD_S ;
dst_vsi = ( ( u64 ) dst - > u . port_info . port_id < <
ICE_TXD_CTX_QW1_VSI_S ) & ICE_TXD_CTX_QW1_VSI_M ;
off - > cd_qw1 = cd_cmd | dst_vsi | ICE_TX_DESC_DTYPE_CTX ;
}
}
2021-08-19 17:08:54 -07:00
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_release_env - clear eswitch HW filters
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF struct
*
* This function removes HW filters configuration specific for switchdev
* mode and restores default legacy mode settings .
*/
static void ice_eswitch_release_env ( struct ice_pf * pf )
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * uplink_vsi = pf - > eswitch . uplink_vsi ;
struct ice_vsi * ctrl_vsi = pf - > eswitch . control_vsi ;
2023-07-12 13:03:29 +02:00
struct ice_vsi_vlan_ops * vlan_ops ;
vlan_ops = ice_get_compat_vsi_vlan_ops ( uplink_vsi ) ;
2021-08-19 17:08:54 -07:00
2023-07-12 13:03:30 +02:00
ice_vsi_update_local_lb ( uplink_vsi , false ) ;
2021-08-19 17:08:54 -07:00
ice_vsi_update_security ( ctrl_vsi , ice_vsi_ctx_clear_allow_override ) ;
ice_vsi_update_security ( uplink_vsi , ice_vsi_ctx_clear_allow_override ) ;
2023-07-12 13:03:29 +02:00
vlan_ops - > ena_rx_filtering ( uplink_vsi ) ;
2022-07-04 15:12:26 +02:00
ice_clear_dflt_vsi ( uplink_vsi ) ;
2021-08-19 17:08:54 -07:00
ice_fltr_add_mac_and_broadcast ( uplink_vsi ,
uplink_vsi - > port_info - > mac . perm_addr ,
ICE_FWD_TO_VSI ) ;
}
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_vsi_setup - configure eswitch control VSI
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF structure
* @ pi : pointer to port_info structure
*/
static struct ice_vsi *
ice_eswitch_vsi_setup ( struct ice_pf * pf , struct ice_port_info * pi )
{
ice: refactor VSI setup to use parameter structure
The ice_vsi_setup function, ice_vsi_alloc, and ice_vsi_cfg functions have
grown a large number of parameters. These parameters are used to initialize
a new VSI, as well as re-configure an existing VSI
Any time we want to add a new parameter to this function chain, even if it
will usually be unset, we have to change many call sites due to changing
the function signature.
A future change is going to refactor ice_vsi_alloc and ice_vsi_cfg to move
the VSI configuration and initialization all into ice_vsi_cfg.
Before this, refactor the VSI setup flow to use a new ice_vsi_cfg_params
structure. This will contain the configuration (mainly pointers) used to
initialize a VSI.
Pass this from ice_vsi_setup into the related functions such as
ice_vsi_alloc, ice_vsi_cfg, and ice_vsi_cfg_def.
Introduce a helper, ice_vsi_to_params to convert an existing VSI to the
parameters used to initialize it. This will aid in the flows where we
rebuild an existing VSI.
Since we also pass the ICE_VSI_FLAG_INIT to more functions which do not
need (or cannot yet have) the VSI parameters, lets make this clear by
renaming the function parameter to vsi_flags and using a u32 instead of a
signed integer. The name vsi_flags also makes it clear that we may extend
the flags in the future.
This change will make it easier to refactor the setup flow in the future,
and will reduce the complexity required to add a new parameter for
configuration in the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2023-01-18 17:16:43 -08:00
struct ice_vsi_cfg_params params = { } ;
params . type = ICE_VSI_SWITCHDEV_CTRL ;
params . pi = pi ;
params . flags = ICE_VSI_FLAG_INIT ;
return ice_vsi_setup ( pf , & params ) ;
2021-08-19 17:08:54 -07:00
}
/**
* ice_eswitch_napi_enable - enable NAPI for all port representors
2023-10-24 13:09:20 +02:00
* @ reprs : xarray of reprs
2021-08-19 17:08:54 -07:00
*/
2023-10-24 13:09:20 +02:00
static void ice_eswitch_napi_enable ( struct xarray * reprs )
2021-08-19 17:08:54 -07:00
{
2023-10-24 13:09:20 +02:00
struct ice_repr * repr ;
unsigned long id ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2023-10-24 13:09:20 +02:00
xa_for_each ( reprs , id , repr )
napi_enable ( & repr - > q_vector - > napi ) ;
2021-08-19 17:08:54 -07:00
}
/**
* ice_eswitch_napi_disable - disable NAPI for all port representors
2023-10-24 13:09:20 +02:00
* @ reprs : xarray of reprs
2021-08-19 17:08:54 -07:00
*/
2023-10-24 13:09:20 +02:00
static void ice_eswitch_napi_disable ( struct xarray * reprs )
2021-08-19 17:08:54 -07:00
{
2023-10-24 13:09:20 +02:00
struct ice_repr * repr ;
unsigned long id ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2023-10-24 13:09:20 +02:00
xa_for_each ( reprs , id , repr )
napi_disable ( & repr - > q_vector - > napi ) ;
2021-08-19 17:08:54 -07:00
}
/**
* ice_eswitch_enable_switchdev - configure eswitch in switchdev mode
* @ pf : pointer to PF structure
*/
static int ice_eswitch_enable_switchdev ( struct ice_pf * pf )
{
2023-07-12 13:03:31 +02:00
struct ice_vsi * ctrl_vsi , * uplink_vsi ;
uplink_vsi = ice_get_main_vsi ( pf ) ;
if ( ! uplink_vsi )
return - ENODEV ;
if ( netif_is_any_bridge_port ( uplink_vsi - > netdev ) ) {
dev_err ( ice_pf_to_dev ( pf ) ,
" Uplink port cannot be a bridge port \n " ) ;
return - EINVAL ;
}
2021-08-19 17:08:54 -07:00
2023-10-24 13:09:15 +02:00
pf - > eswitch . control_vsi = ice_eswitch_vsi_setup ( pf , pf - > hw . port_info ) ;
if ( ! pf - > eswitch . control_vsi )
2021-08-19 17:08:54 -07:00
return - ENODEV ;
2023-10-24 13:09:15 +02:00
ctrl_vsi = pf - > eswitch . control_vsi ;
pf - > eswitch . uplink_vsi = uplink_vsi ;
2021-08-19 17:08:54 -07:00
if ( ice_eswitch_setup_env ( pf ) )
goto err_vsi ;
2023-07-12 13:03:31 +02:00
if ( ice_eswitch_br_offloads_init ( pf ) )
goto err_br_offloads ;
2023-10-24 13:09:27 +02:00
pf - > eswitch . is_running = true ;
2021-08-19 17:08:54 -07:00
return 0 ;
2023-07-12 13:03:31 +02:00
err_br_offloads :
2021-08-19 17:08:54 -07:00
ice_eswitch_release_env ( pf ) ;
err_vsi :
ice_vsi_release ( ctrl_vsi ) ;
return - ENODEV ;
}
/**
2023-10-24 13:09:15 +02:00
* ice_eswitch_disable_switchdev - disable eswitch resources
2021-08-19 17:08:54 -07:00
* @ pf : pointer to PF structure
*/
static void ice_eswitch_disable_switchdev ( struct ice_pf * pf )
{
2023-10-24 13:09:15 +02:00
struct ice_vsi * ctrl_vsi = pf - > eswitch . control_vsi ;
2021-08-19 17:08:54 -07:00
2023-07-12 13:03:31 +02:00
ice_eswitch_br_offloads_deinit ( pf ) ;
2021-08-19 17:08:54 -07:00
ice_eswitch_release_env ( pf ) ;
ice_vsi_release ( ctrl_vsi ) ;
2023-10-24 13:09:27 +02:00
pf - > eswitch . is_running = false ;
2021-08-19 17:08:54 -07:00
}
2021-08-19 17:08:48 -07:00
/**
* ice_eswitch_mode_set - set new eswitch mode
* @ devlink : pointer to devlink structure
* @ mode : eswitch mode to switch to
* @ extack : pointer to extack structure
*/
int
ice_eswitch_mode_set ( struct devlink * devlink , u16 mode ,
struct netlink_ext_ack * extack )
{
struct ice_pf * pf = devlink_priv ( devlink ) ;
if ( pf - > eswitch_mode = = mode )
return 0 ;
2022-02-16 13:37:37 -08:00
if ( ice_has_vfs ( pf ) ) {
2021-08-19 17:08:48 -07:00
dev_info ( ice_pf_to_dev ( pf ) , " Changing eswitch mode is allowed only if there is no VFs created " ) ;
NL_SET_ERR_MSG_MOD ( extack , " Changing eswitch mode is allowed only if there is no VFs created " ) ;
return - EOPNOTSUPP ;
}
switch ( mode ) {
case DEVLINK_ESWITCH_MODE_LEGACY :
dev_info ( ice_pf_to_dev ( pf ) , " PF %d changed eswitch mode to legacy " ,
pf - > hw . pf_id ) ;
2023-10-24 13:09:20 +02:00
xa_destroy ( & pf - > eswitch . reprs ) ;
2021-08-19 17:08:48 -07:00
NL_SET_ERR_MSG_MOD ( extack , " Changed eswitch mode to legacy " ) ;
break ;
case DEVLINK_ESWITCH_MODE_SWITCHDEV :
{
2023-08-16 12:34:05 -07:00
if ( ice_is_adq_active ( pf ) ) {
dev_err ( ice_pf_to_dev ( pf ) , " Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root " ) ;
NL_SET_ERR_MSG_MOD ( extack , " Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root " ) ;
return - EOPNOTSUPP ;
}
2021-08-19 17:08:48 -07:00
dev_info ( ice_pf_to_dev ( pf ) , " PF %d changed eswitch mode to switchdev " ,
pf - > hw . pf_id ) ;
2023-10-24 13:09:20 +02:00
xa_init_flags ( & pf - > eswitch . reprs , XA_FLAGS_ALLOC ) ;
2021-08-19 17:08:48 -07:00
NL_SET_ERR_MSG_MOD ( extack , " Changed eswitch mode to switchdev " ) ;
break ;
}
default :
NL_SET_ERR_MSG_MOD ( extack , " Unknown eswitch mode " ) ;
return - EINVAL ;
}
pf - > eswitch_mode = mode ;
return 0 ;
}
/**
* ice_eswitch_mode_get - get current eswitch mode
* @ devlink : pointer to devlink structure
* @ mode : output parameter for current eswitch mode
*/
int ice_eswitch_mode_get ( struct devlink * devlink , u16 * mode )
{
struct ice_pf * pf = devlink_priv ( devlink ) ;
* mode = pf - > eswitch_mode ;
return 0 ;
}
2021-08-19 17:08:54 -07:00
2021-08-19 17:08:56 -07:00
/**
* ice_is_eswitch_mode_switchdev - check if eswitch mode is set to switchdev
* @ pf : pointer to PF structure
*
* Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV ,
* false otherwise .
*/
bool ice_is_eswitch_mode_switchdev ( struct ice_pf * pf )
{
return pf - > eswitch_mode = = DEVLINK_ESWITCH_MODE_SWITCHDEV ;
}
2021-08-19 17:08:57 -07:00
/**
* ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
* @ pf : pointer to PF structure
*/
static void ice_eswitch_start_all_tx_queues ( struct ice_pf * pf )
{
2023-10-24 13:09:20 +02:00
struct ice_repr * repr ;
unsigned long id ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2021-08-19 17:08:57 -07:00
if ( test_bit ( ICE_DOWN , pf - > state ) )
return ;
2023-10-24 13:09:20 +02:00
xa_for_each ( & pf - > eswitch . reprs , id , repr )
ice_repr_start_tx_queues ( repr ) ;
2021-08-19 17:08:57 -07:00
}
/**
* ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors
* @ pf : pointer to PF structure
*/
void ice_eswitch_stop_all_tx_queues ( struct ice_pf * pf )
{
2023-10-24 13:09:20 +02:00
struct ice_repr * repr ;
unsigned long id ;
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-16 13:37:38 -08:00
2021-08-19 17:08:57 -07:00
if ( test_bit ( ICE_DOWN , pf - > state ) )
return ;
2023-10-24 13:09:20 +02:00
xa_for_each ( & pf - > eswitch . reprs , id , repr )
ice_repr_stop_tx_queues ( repr ) ;
2021-08-19 17:08:57 -07:00
}
2023-10-24 13:09:27 +02:00
static void ice_eswitch_stop_reprs ( struct ice_pf * pf )
{
ice_eswitch_del_sp_rules ( pf ) ;
ice_eswitch_stop_all_tx_queues ( pf ) ;
ice_eswitch_napi_disable ( & pf - > eswitch . reprs ) ;
}
static void ice_eswitch_start_reprs ( struct ice_pf * pf )
{
ice_eswitch_napi_enable ( & pf - > eswitch . reprs ) ;
ice_eswitch_start_all_tx_queues ( pf ) ;
ice_eswitch_add_sp_rules ( pf ) ;
}
static void
ice_eswitch_cp_change_queues ( struct ice_eswitch * eswitch , int change )
{
struct ice_vsi * cp = eswitch - > control_vsi ;
ice_vsi_close ( cp ) ;
cp - > req_txq = cp - > alloc_txq + change ;
cp - > req_rxq = cp - > alloc_rxq + change ;
ice_vsi_rebuild ( cp , ICE_VSI_FLAG_NO_INIT ) ;
ice_eswitch_remap_rings_to_vectors ( eswitch ) ;
ice_vsi_open ( cp ) ;
}
int
ice_eswitch_attach ( struct ice_pf * pf , struct ice_vf * vf )
{
struct ice_repr * repr ;
int change = 1 ;
int err ;
if ( pf - > eswitch_mode = = DEVLINK_ESWITCH_MODE_LEGACY )
return 0 ;
if ( xa_empty ( & pf - > eswitch . reprs ) ) {
err = ice_eswitch_enable_switchdev ( pf ) ;
if ( err )
return err ;
/* Control plane VSI is created with 1 queue as default */
change = 0 ;
}
ice_eswitch_stop_reprs ( pf ) ;
repr = ice_repr_add_vf ( vf ) ;
if ( IS_ERR ( repr ) )
goto err_create_repr ;
err = ice_eswitch_setup_repr ( pf , repr ) ;
if ( err )
goto err_setup_repr ;
err = xa_alloc ( & pf - > eswitch . reprs , & repr - > id , repr ,
XA_LIMIT ( 1 , INT_MAX ) , GFP_KERNEL ) ;
if ( err )
goto err_xa_alloc ;
vf - > repr_id = repr - > id ;
ice_eswitch_cp_change_queues ( & pf - > eswitch , change ) ;
ice_eswitch_start_reprs ( pf ) ;
return 0 ;
err_xa_alloc :
ice_eswitch_release_repr ( pf , repr ) ;
err_setup_repr :
ice_repr_rem_vf ( repr ) ;
err_create_repr :
if ( xa_empty ( & pf - > eswitch . reprs ) )
ice_eswitch_disable_switchdev ( pf ) ;
ice_eswitch_start_reprs ( pf ) ;
return err ;
}
void ice_eswitch_detach ( struct ice_pf * pf , struct ice_vf * vf )
{
struct ice_repr * repr = xa_load ( & pf - > eswitch . reprs , vf - > repr_id ) ;
struct devlink * devlink = priv_to_devlink ( pf ) ;
if ( ! repr )
return ;
ice_eswitch_stop_reprs ( pf ) ;
xa_erase ( & pf - > eswitch . reprs , repr - > id ) ;
if ( xa_empty ( & pf - > eswitch . reprs ) )
ice_eswitch_disable_switchdev ( pf ) ;
else
ice_eswitch_cp_change_queues ( & pf - > eswitch , - 1 ) ;
ice_eswitch_release_repr ( pf , repr ) ;
ice_repr_rem_vf ( repr ) ;
if ( xa_empty ( & pf - > eswitch . reprs ) ) {
/* since all port representors are destroyed, there is
* no point in keeping the nodes
*/
ice_devlink_rate_clear_tx_topology ( ice_get_main_vsi ( pf ) ) ;
devl_lock ( devlink ) ;
devl_rate_nodes_destroy ( devlink ) ;
devl_unlock ( devlink ) ;
} else {
ice_eswitch_start_reprs ( pf ) ;
}
}
2023-10-24 13:09:28 +02:00
/**
* ice_eswitch_rebuild - rebuild eswitch
* @ pf : pointer to PF structure
*/
int ice_eswitch_rebuild ( struct ice_pf * pf )
{
struct ice_repr * repr ;
unsigned long id ;
int err ;
if ( ! ice_is_switchdev_running ( pf ) )
return 0 ;
err = ice_vsi_rebuild ( pf - > eswitch . control_vsi , ICE_VSI_FLAG_INIT ) ;
if ( err )
return err ;
xa_for_each ( & pf - > eswitch . reprs , id , repr )
ice_eswitch_detach ( pf , repr - > vf ) ;
return 0 ;
}