mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-04-13 09:59:31 +00:00
i40e: add ability to reset VF for Tx and Rx MDD events
Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD events for VFs. This flag is also used in other network adapters like ICE. Usage: - "on" - The problematic VF will be automatically reset if a malformed descriptor is detected. - "off" - The problematic VF will be disabled. In cases where a VF sends malformed packets classified as malicious, it can cause the Tx queue to freeze, rendering it unusable for several minutes. When an MDD event occurs, this new implementation allows for a graceful VF reset to quickly restore operational state. Currently, VF queues are disabled if an MDD event occurs. This patch adds the ability to reset the VF if a Tx or Rx MDD event occurs. It also includes MDD event logging throttling to avoid dmesg pollution and unifies the format of Tx and Rx MDD messages. Note: Standard message rate limiting functions like dev_info_ratelimited() do not meet our requirements. Custom rate limiting is implemented, please see the code for details. Co-developed-by: Jan Sokolowski <jan.sokolowski@intel.com> Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com> Co-developed-by: Padraig J Connolly <padraig.j.connolly@intel.com> Signed-off-by: Padraig J Connolly <padraig.j.connolly@intel.com> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com> Reviewed-by: Michal Schmidt <mschmidt@redhat.com> Tested-by: Rafal Romanowski <rafal.romanowski@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Link: https://patch.msgid.link/20250106221929.956999-13-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
35f715cb77
commit
07af482e64
7 changed files with 123 additions and 17 deletions
|
@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
|
|||
ethtool --show-priv-flags ethX
|
||||
ethtool --set-priv-flags ethX link-down-on-close [on|off]
|
||||
|
||||
Setting the mdd-auto-reset-vf Private Flag
|
||||
------------------------------------------
|
||||
|
||||
When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
|
||||
be automatically reset if a malformed descriptor is detected. If the flag is
|
||||
set to "off", the problematic VF will be disabled.
|
||||
|
||||
Use ethtool to view and set mdd-auto-reset-vf, as follows::
|
||||
|
||||
ethtool --show-priv-flags ethX
|
||||
ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
|
||||
|
||||
Viewing Link Messages
|
||||
---------------------
|
||||
Link messages will not be displayed to the console if the distribution is
|
||||
|
|
|
@ -88,6 +88,7 @@ enum i40e_state {
|
|||
__I40E_SERVICE_SCHED,
|
||||
__I40E_ADMINQ_EVENT_PENDING,
|
||||
__I40E_MDD_EVENT_PENDING,
|
||||
__I40E_MDD_VF_PRINT_PENDING,
|
||||
__I40E_VFLR_EVENT_PENDING,
|
||||
__I40E_RESET_RECOVERY_PENDING,
|
||||
__I40E_TIMEOUT_RECOVERY_PENDING,
|
||||
|
@ -191,6 +192,7 @@ enum i40e_pf_flags {
|
|||
*/
|
||||
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
|
||||
I40E_FLAG_VF_VLAN_PRUNING_ENA,
|
||||
I40E_FLAG_MDD_AUTO_RESET_VF,
|
||||
I40E_PF_FLAGS_NBITS, /* must be last */
|
||||
};
|
||||
|
||||
|
@ -572,7 +574,7 @@ struct i40e_pf {
|
|||
int num_alloc_vfs; /* actual number of VFs allocated */
|
||||
u32 vf_aq_requests;
|
||||
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
|
||||
|
||||
struct ratelimit_state mdd_message_rate_limit;
|
||||
/* DCBx/DCBNL capability for PF that indicates
|
||||
* whether DCBx is managed by firmware or host
|
||||
* based agent (LLDPAD). Also, indicates what
|
||||
|
|
|
@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
|
|||
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
|
||||
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
|
||||
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
|
||||
vf->num_mdd_events);
|
||||
vf->mdd_tx_events.count + vf->mdd_rx_events.count);
|
||||
} else {
|
||||
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
|
||||
}
|
||||
|
|
|
@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
|
|||
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
|
||||
I40E_PRIV_FLAG("vf-vlan-pruning",
|
||||
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
|
||||
I40E_PRIV_FLAG("mdd-auto-reset-vf",
|
||||
I40E_FLAG_MDD_AUTO_RESET_VF, 0),
|
||||
};
|
||||
|
||||
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
|
||||
|
|
|
@ -11179,6 +11179,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
|
|||
i40e_reset_and_rebuild(pf, false, lock_acquired);
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
|
||||
* @pf: board private structure
|
||||
* @vf: pointer to the VF structure
|
||||
* @is_tx: true - for Tx event, false - for Rx
|
||||
*/
|
||||
static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
|
||||
bool is_tx)
|
||||
{
|
||||
dev_err(&pf->pdev->dev, is_tx ?
|
||||
"%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
|
||||
"%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
|
||||
is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
|
||||
pf->hw.pf_id,
|
||||
vf->vf_id,
|
||||
vf->default_lan_addr.addr,
|
||||
str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_print_vfs_mdd_events - print VFs malicious driver detect event
|
||||
* @pf: pointer to the PF structure
|
||||
*
|
||||
* Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
|
||||
*/
|
||||
static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* check that there are pending MDD events to print */
|
||||
if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
|
||||
return;
|
||||
|
||||
if (!__ratelimit(&pf->mdd_message_rate_limit))
|
||||
return;
|
||||
|
||||
for (i = 0; i < pf->num_alloc_vfs; i++) {
|
||||
struct i40e_vf *vf = &pf->vf[i];
|
||||
bool is_printed = false;
|
||||
|
||||
/* only print Rx MDD event message if there are new events */
|
||||
if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
|
||||
vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
|
||||
i40e_print_vf_mdd_event(pf, vf, false);
|
||||
is_printed = true;
|
||||
}
|
||||
|
||||
/* only print Tx MDD event message if there are new events */
|
||||
if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
|
||||
vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
|
||||
i40e_print_vf_mdd_event(pf, vf, true);
|
||||
is_printed = true;
|
||||
}
|
||||
|
||||
if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
|
||||
dev_info(&pf->pdev->dev,
|
||||
"Use PF Control I/F to re-enable the VF #%d\n",
|
||||
i);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_handle_mdd_event
|
||||
* @pf: pointer to the PF structure
|
||||
|
@ -11193,8 +11254,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
|
|||
u32 reg;
|
||||
int i;
|
||||
|
||||
if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
|
||||
if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
|
||||
/* Since the VF MDD event logging is rate limited, check if
|
||||
* there are pending MDD events.
|
||||
*/
|
||||
i40e_print_vfs_mdd_events(pf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* find what triggered the MDD event */
|
||||
reg = rd32(hw, I40E_GL_MDET_TX);
|
||||
|
@ -11238,36 +11304,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
|
|||
|
||||
/* see if one of the VFs needs its hand slapped */
|
||||
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
|
||||
bool is_mdd_on_tx = false;
|
||||
bool is_mdd_on_rx = false;
|
||||
|
||||
vf = &(pf->vf[i]);
|
||||
reg = rd32(hw, I40E_VP_MDET_TX(i));
|
||||
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
|
||||
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
|
||||
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
|
||||
vf->num_mdd_events++;
|
||||
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
|
||||
i);
|
||||
dev_info(&pf->pdev->dev,
|
||||
"Use PF Control I/F to re-enable the VF\n");
|
||||
vf->mdd_tx_events.count++;
|
||||
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
|
||||
is_mdd_on_tx = true;
|
||||
}
|
||||
|
||||
reg = rd32(hw, I40E_VP_MDET_RX(i));
|
||||
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
|
||||
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
|
||||
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
|
||||
vf->num_mdd_events++;
|
||||
dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
|
||||
i);
|
||||
dev_info(&pf->pdev->dev,
|
||||
"Use PF Control I/F to re-enable the VF\n");
|
||||
vf->mdd_rx_events.count++;
|
||||
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
|
||||
is_mdd_on_rx = true;
|
||||
}
|
||||
|
||||
if ((is_mdd_on_tx || is_mdd_on_rx) &&
|
||||
test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
|
||||
/* VF MDD event counters will be cleared by
|
||||
* reset, so print the event prior to reset.
|
||||
*/
|
||||
if (is_mdd_on_rx)
|
||||
i40e_print_vf_mdd_event(pf, vf, false);
|
||||
if (is_mdd_on_tx)
|
||||
i40e_print_vf_mdd_event(pf, vf, true);
|
||||
|
||||
i40e_vc_reset_vf(vf, true);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-enable mdd interrupt cause */
|
||||
clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
|
||||
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
|
||||
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
|
||||
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
|
||||
i40e_flush(hw);
|
||||
|
||||
i40e_print_vfs_mdd_events(pf);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -15878,6 +15956,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
ERR_PTR(err),
|
||||
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
|
||||
|
||||
/* VF MDD event logs are rate limited to one second intervals */
|
||||
ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
|
||||
|
||||
/* Reconfigure hardware for allowing smaller MSS in the case
|
||||
* of TSO, so that we avoid the MDD being fired and causing
|
||||
* a reset in the case of small MSS+TSO.
|
||||
|
|
|
@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
|
|||
* @notify_vf: notify vf about reset or not
|
||||
* Reset VF handler.
|
||||
**/
|
||||
static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
|
||||
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
|
||||
{
|
||||
struct i40e_pf *pf = vf->pf;
|
||||
int i;
|
||||
|
|
|
@ -64,6 +64,12 @@ struct i40evf_channel {
|
|||
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
|
||||
};
|
||||
|
||||
struct i40e_mdd_vf_events {
|
||||
u64 count; /* total count of Rx|Tx events */
|
||||
/* count number of the last printed event */
|
||||
u64 last_printed;
|
||||
};
|
||||
|
||||
/* VF information structure */
|
||||
struct i40e_vf {
|
||||
struct i40e_pf *pf;
|
||||
|
@ -92,7 +98,9 @@ struct i40e_vf {
|
|||
|
||||
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
|
||||
u8 num_req_queues; /* num of requested qps */
|
||||
u64 num_mdd_events; /* num of mdd events detected */
|
||||
/* num of mdd tx and rx events detected */
|
||||
struct i40e_mdd_vf_events mdd_rx_events;
|
||||
struct i40e_mdd_vf_events mdd_tx_events;
|
||||
|
||||
unsigned long vf_caps; /* vf's adv. capabilities */
|
||||
unsigned long vf_states; /* vf's runtime states */
|
||||
|
@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
|
|||
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
|
||||
u32 v_retval, u8 *msg, u16 msglen);
|
||||
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
|
||||
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
|
||||
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
|
||||
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
|
||||
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
|
||||
|
|
Loading…
Add table
Reference in a new issue