s390 fixes for 6.16-rc5

- Fix PCI error recovery and bring it in line with AER/EEH
 -----BEGIN PGP SIGNATURE-----
 
 iI0EABYKADUWIQQrtrZiYVkVzKQcYivNdxKlNrRb8AUCaGU3EBccYWdvcmRlZXZA
 bGludXguaWJtLmNvbQAKCRDNdxKlNrRb8BoVAP4+t6zSnkV8nSDTX6yISaI9o8Dx
 v4feTwMOkizuFbQbxQEA/QiQsN4bOymNDWwNgkzLDEBjirVLsYVzlvS2ttFfjAE=
 =oStU
 -----END PGP SIGNATURE-----

Merge tag 's390-6.16-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 fixes from Alexander Gordeev:

 - Fix PCI error recovery and bring it in line with AER/EEH

* tag 's390-6.16-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/pci: Allow automatic recovery with minimal driver support
  s390/pci: Do not try re-enabling load/store if device is disabled
  s390/pci: Fix stale function handles in error handling
This commit is contained in:
Linus Torvalds 2025-07-02 09:27:57 -07:00
commit ba6a2f25d3

View file

@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
if (!driver->err_handler->slot_reset)
return false;
if (!driver->err_handler->resume)
return false;
return true;
}
@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
/* The underlying device may have been disabled by the event */
if (!zdev_enabled(zdev))
return PCI_ERS_RESULT_NEED_RESET;
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
if (driver->err_handler->mmio_enabled) {
if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
pci_name(pdev));
return ers_res;
} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
return ers_res;
}
else
ers_res = PCI_ERS_RESULT_NONE;
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
pci_name(pdev));
return ers_res;
} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
ers_res = driver->err_handler->slot_reset(pdev);
if (driver->err_handler->slot_reset)
ers_res = driver->err_handler->slot_reset(pdev);
else
ers_res = PCI_ERS_RESULT_NONE;
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
goto out_unlock;
}
if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on MMIO enable)";
@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
/*
* ers_res can be PCI_ERS_RESULT_NONE either because the driver
* decided to return it, indicating that it abstains from voting
* on how to recover, or because it didn't implement the callback.
* Both cases assume, that if there is nothing else causing a
* disconnect, we recovered successfully.
*/
if (ers_res == PCI_ERS_RESULT_NONE)
ers_res = PCI_ERS_RESULT_RECOVERED;
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
u32 fh = 0;
int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
rc = clp_refresh_fh(zdev->fid, &fh);
if (rc)
goto no_pdev;
if (!fh || ccdf->fh != fh) {
/* Ignore events with stale handles */
zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
ccdf->fid, fh, ccdf->fh);
goto no_pdev;
}
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);