bnxt_en: Change FW message timeout warning

The firmware advertises a "hwrm_cmd_max_timeout" value to the driver
for NVRAM and coredump related functions that can take tens of seconds
to complete.  The driver polls for the operation to complete under
mutex and may trigger hung task watchdog warning if the wait is too long.
To warn the user about this, the driver currently prints a warning if
this advertised value exceeds 40 seconds:

Device requests max timeout of %d seconds, may trigger hung task watchdog

Initially, we chose 40 seconds, well below the kernel's default
CONFIG_DEFAULT_HUNG_TASK_TIMEOUT (120 seconds) to avoid triggering
the hung task watchdog.  But 60 seconds is the timeout on most
production FW and cannot be reduced further.  Change the driver's warning
threshold to 60 seconds to avoid triggering this warning on all
production devices.  We also print the warning if the value exceeds
CONFIG_DEFAULT_HUNG_TASK_TIMEOUT which may be set to architecture
specific defaults as low as 10 seconds.

Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250417172448.1206107-2-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Michael Chan 2025-04-17 10:24:45 -07:00 committed by Jakub Kicinski
parent 50f257069a
commit 0fcad44a86
2 changed files with 8 additions and 5 deletions

View file

@ -10110,7 +10110,7 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
struct hwrm_ver_get_input *req;
u16 fw_maj, fw_min, fw_bld, fw_rsv;
u32 dev_caps_cfg, hwrm_ver;
int rc, len;
int rc, len, max_tmo_secs;
rc = hwrm_req_init(bp, req, HWRM_VER_GET);
if (rc)
@ -10183,9 +10183,12 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
bp->hwrm_cmd_max_timeout = le16_to_cpu(resp->max_req_timeout) * 1000;
if (!bp->hwrm_cmd_max_timeout)
bp->hwrm_cmd_max_timeout = HWRM_CMD_MAX_TIMEOUT;
else if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT)
netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog\n",
bp->hwrm_cmd_max_timeout / 1000);
max_tmo_secs = bp->hwrm_cmd_max_timeout / 1000;
if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT ||
max_tmo_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) {
netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog (kernel default %ds)\n",
max_tmo_secs, CONFIG_DEFAULT_HUNG_TASK_TIMEOUT);
}
if (resp->hwrm_intf_maj_8b >= 1) {
bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len);

View file

@ -58,7 +58,7 @@ void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s);
#define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len)
#define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input)
#define HWRM_CMD_MAX_TIMEOUT 40000U
#define HWRM_CMD_MAX_TIMEOUT 60000U
#define SHORT_HWRM_CMD_TIMEOUT 20
#define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout)
#define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4)