mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-11-01 09:13:37 +00:00 
			
		
		
		
	scsi: mpt3sas: Handle firmware faults during second half of IOC init
If a firmware fault occurs while scanning the devices during IOC initialization then the driver issues the hard reset operation to recover the IOC. However, the driver is not issuing a Port enable request message as part of hard reset operation during IOC initialization. Due to this, the driver will not receive get any device discovery-related events and hence devices will not be accessible. Teach the driver to gracefully handle firmware faults while scanning for target devices during IOC initialization. Make the driver issue a port enable request message as part of hard reset operation. This permits receiving device discovery-related events from the firmware after the hard reset operation completes. Link: https://lore.kernel.org/r/20210518051625.1596742-4-suganath-prabu.subramani@broadcom.com Signed-off-by: Suganath Prabu S <suganath-prabu.subramani@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
		
							parent
							
								
									19a622c39a
								
							
						
					
					
						commit
						a0815c45c8
					
				
					 3 changed files with 145 additions and 16 deletions
				
			
		| 
						 | 
				
			
			@ -7205,7 +7205,7 @@ mpt3sas_port_enable_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
 | 
			
		|||
	if (ioc_status != MPI2_IOCSTATUS_SUCCESS)
 | 
			
		||||
		ioc->port_enable_failed = 1;
 | 
			
		||||
 | 
			
		||||
	if (ioc->is_driver_loading) {
 | 
			
		||||
	if (ioc->port_enable_cmds.status & MPT3_CMD_COMPLETE_ASYNC) {
 | 
			
		||||
		if (ioc_status == MPI2_IOCSTATUS_SUCCESS) {
 | 
			
		||||
			mpt3sas_port_enable_complete(ioc);
 | 
			
		||||
			return 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -7214,6 +7214,7 @@ mpt3sas_port_enable_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
 | 
			
		|||
			ioc->start_scan = 0;
 | 
			
		||||
			return 1;
 | 
			
		||||
		}
 | 
			
		||||
		ioc->port_enable_cmds.status &= ~MPT3_CMD_COMPLETE_ASYNC;
 | 
			
		||||
	}
 | 
			
		||||
	complete(&ioc->port_enable_cmds.done);
 | 
			
		||||
	return 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -7308,6 +7309,7 @@ mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
	}
 | 
			
		||||
	ioc->drv_internal_flags |= MPT_DRV_INTERNAL_FIRST_PE_ISSUED;
 | 
			
		||||
	ioc->port_enable_cmds.status = MPT3_CMD_PENDING;
 | 
			
		||||
	ioc->port_enable_cmds.status |= MPT3_CMD_COMPLETE_ASYNC;
 | 
			
		||||
	mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 | 
			
		||||
	ioc->port_enable_cmds.smid = smid;
 | 
			
		||||
	memset(mpi_request, 0, sizeof(Mpi2PortEnableRequest_t));
 | 
			
		||||
| 
						 | 
				
			
			@ -7856,7 +7858,7 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	if (ioc->is_driver_loading) {
 | 
			
		||||
	if (!ioc->shost_recovery) {
 | 
			
		||||
 | 
			
		||||
		if (ioc->is_warpdrive && ioc->manu_pg10.OEMIdentifier
 | 
			
		||||
		    == 0x80) {
 | 
			
		||||
| 
						 | 
				
			
			@ -8276,8 +8278,6 @@ _base_clear_outstanding_mpt_commands(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
			ioc->start_scan_failed =
 | 
			
		||||
				MPI2_IOCSTATUS_INTERNAL_ERROR;
 | 
			
		||||
			ioc->start_scan = 0;
 | 
			
		||||
			ioc->port_enable_cmds.status =
 | 
			
		||||
				MPT3_CMD_NOT_USED;
 | 
			
		||||
		} else {
 | 
			
		||||
			complete(&ioc->port_enable_cmds.done);
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -500,6 +500,7 @@ struct MPT3SAS_DEVICE {
 | 
			
		|||
#define MPT3_CMD_PENDING	0x0002	/* pending */
 | 
			
		||||
#define MPT3_CMD_REPLY_VALID	0x0004	/* reply is valid */
 | 
			
		||||
#define MPT3_CMD_RESET		0x0008	/* host reset dropped the command */
 | 
			
		||||
#define MPT3_CMD_COMPLETE_ASYNC 0x0010  /* tells whether cmd completes in same thread or not */
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * struct _internal_cmd - internal commands struct
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -78,6 +78,7 @@ static void _scsih_pcie_device_remove_from_sml(struct MPT3SAS_ADAPTER *ioc,
 | 
			
		|||
static void
 | 
			
		||||
_scsih_pcie_check_device(struct MPT3SAS_ADAPTER *ioc, u16 handle);
 | 
			
		||||
static u8 _scsih_check_for_pending_tm(struct MPT3SAS_ADAPTER *ioc, u16 smid);
 | 
			
		||||
static void _scsih_complete_devices_scanning(struct MPT3SAS_ADAPTER *ioc);
 | 
			
		||||
 | 
			
		||||
/* global parameters */
 | 
			
		||||
LIST_HEAD(mpt3sas_ioc_list);
 | 
			
		||||
| 
						 | 
				
			
			@ -3631,8 +3632,6 @@ _scsih_error_recovery_delete_devices(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
{
 | 
			
		||||
	struct fw_event_work *fw_event;
 | 
			
		||||
 | 
			
		||||
	if (ioc->is_driver_loading)
 | 
			
		||||
		return;
 | 
			
		||||
	fw_event = alloc_fw_event_work(0);
 | 
			
		||||
	if (!fw_event)
 | 
			
		||||
		return;
 | 
			
		||||
| 
						 | 
				
			
			@ -3693,6 +3692,14 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
	if ((list_empty(&ioc->fw_event_list) && !ioc->current_event) ||
 | 
			
		||||
	    !ioc->firmware_event_thread)
 | 
			
		||||
		return;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Set current running event as ignore, so that
 | 
			
		||||
	 * current running event will exit quickly.
 | 
			
		||||
	 * As diag reset has occurred it is of no use
 | 
			
		||||
	 * to process remaining stale event data entries.
 | 
			
		||||
	 */
 | 
			
		||||
	if (ioc->shost_recovery && ioc->current_event)
 | 
			
		||||
		ioc->current_event->ignore = 1;
 | 
			
		||||
 | 
			
		||||
	ioc->fw_events_cleanup = 1;
 | 
			
		||||
	while ((fw_event = dequeue_next_fw_event(ioc)) ||
 | 
			
		||||
| 
						 | 
				
			
			@ -3719,6 +3726,19 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Driver has to clear ioc->start_scan flag when
 | 
			
		||||
		 * it is cleaning up MPT3SAS_PORT_ENABLE_COMPLETE,
 | 
			
		||||
		 * otherwise scsi_scan_host() API waits for the
 | 
			
		||||
		 * 5 minute timer to expire. If we exit from
 | 
			
		||||
		 * scsi_scan_host() early then we can issue the
 | 
			
		||||
		 * new port enable request as part of current diag reset.
 | 
			
		||||
		 */
 | 
			
		||||
		if (fw_event->event == MPT3SAS_PORT_ENABLE_COMPLETE) {
 | 
			
		||||
			ioc->port_enable_cmds.status |= MPT3_CMD_RESET;
 | 
			
		||||
			ioc->start_scan = 0;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Wait on the fw_event to complete. If this returns 1, then
 | 
			
		||||
		 * the event was never executed, and we need a put for the
 | 
			
		||||
| 
						 | 
				
			
			@ -10140,6 +10160,17 @@ _scsih_remove_unresponding_devices(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
	 * owner for the reference the list had on any object we prune.
 | 
			
		||||
	 */
 | 
			
		||||
	spin_lock_irqsave(&ioc->sas_device_lock, flags);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Clean up the sas_device_init_list list as
 | 
			
		||||
	 * driver goes for fresh scan as part of diag reset.
 | 
			
		||||
	 */
 | 
			
		||||
	list_for_each_entry_safe(sas_device, sas_device_next,
 | 
			
		||||
	    &ioc->sas_device_init_list, list) {
 | 
			
		||||
		list_del_init(&sas_device->list);
 | 
			
		||||
		sas_device_put(sas_device);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry_safe(sas_device, sas_device_next,
 | 
			
		||||
	    &ioc->sas_device_list, list) {
 | 
			
		||||
		if (!sas_device->responding)
 | 
			
		||||
| 
						 | 
				
			
			@ -10161,6 +10192,16 @@ _scsih_remove_unresponding_devices(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
	ioc_info(ioc, "Removing unresponding devices: pcie end-devices\n");
 | 
			
		||||
	INIT_LIST_HEAD(&head);
 | 
			
		||||
	spin_lock_irqsave(&ioc->pcie_device_lock, flags);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Clean up the pcie_device_init_list list as
 | 
			
		||||
	 * driver goes for fresh scan as part of diag reset.
 | 
			
		||||
	 */
 | 
			
		||||
	list_for_each_entry_safe(pcie_device, pcie_device_next,
 | 
			
		||||
	    &ioc->pcie_device_init_list, list) {
 | 
			
		||||
		list_del_init(&pcie_device->list);
 | 
			
		||||
		pcie_device_put(pcie_device);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry_safe(pcie_device, pcie_device_next,
 | 
			
		||||
	    &ioc->pcie_device_list, list) {
 | 
			
		||||
		if (!pcie_device->responding)
 | 
			
		||||
| 
						 | 
				
			
			@ -10563,8 +10604,7 @@ void
 | 
			
		|||
mpt3sas_scsih_reset_done_handler(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		||||
{
 | 
			
		||||
	dtmprintk(ioc, ioc_info(ioc, "%s: MPT3_IOC_DONE_RESET\n", __func__));
 | 
			
		||||
	if ((!ioc->is_driver_loading) && !(disable_discovery > 0 &&
 | 
			
		||||
					   !ioc->sas_hba.num_phys)) {
 | 
			
		||||
	if (!(disable_discovery > 0 && !ioc->sas_hba.num_phys)) {
 | 
			
		||||
		if (ioc->multipath_on_hba) {
 | 
			
		||||
			_scsih_sas_port_refresh(ioc);
 | 
			
		||||
			_scsih_update_vphys_after_reset(ioc);
 | 
			
		||||
| 
						 | 
				
			
			@ -10619,6 +10659,18 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
 | 
			
		|||
		_scsih_del_dirty_vphy(ioc);
 | 
			
		||||
		_scsih_del_dirty_port_entries(ioc);
 | 
			
		||||
		_scsih_scan_for_devices_after_reset(ioc);
 | 
			
		||||
		/*
 | 
			
		||||
		 * If diag reset has occurred during the driver load
 | 
			
		||||
		 * then driver has to complete the driver load operation
 | 
			
		||||
		 * by executing the following items:
 | 
			
		||||
		 *- Register the devices from sas_device_init_list to SML
 | 
			
		||||
		 *- clear is_driver_loading flag,
 | 
			
		||||
		 *- start the watchdog thread.
 | 
			
		||||
		 * In happy driver load path, above things are taken care of when
 | 
			
		||||
		 * driver executes scsih_scan_finished().
 | 
			
		||||
		 */
 | 
			
		||||
		if (ioc->is_driver_loading)
 | 
			
		||||
			_scsih_complete_devices_scanning(ioc);
 | 
			
		||||
		_scsih_set_nvme_max_shutdown_latency(ioc);
 | 
			
		||||
		break;
 | 
			
		||||
	case MPT3SAS_PORT_ENABLE_COMPLETE:
 | 
			
		||||
| 
						 | 
				
			
			@ -10764,11 +10816,23 @@ mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index,
 | 
			
		|||
		_scsih_check_topo_delete_events(ioc,
 | 
			
		||||
		    (Mpi2EventDataSasTopologyChangeList_t *)
 | 
			
		||||
		    mpi_reply->EventData);
 | 
			
		||||
		/*
 | 
			
		||||
		 * No need to add the topology change list
 | 
			
		||||
		 * event to fw event work queue when
 | 
			
		||||
		 * diag reset is going on. Since during diag
 | 
			
		||||
		 * reset driver scan the devices by reading
 | 
			
		||||
		 * sas device page0's not by processing the
 | 
			
		||||
		 * events.
 | 
			
		||||
		 */
 | 
			
		||||
		if (ioc->shost_recovery)
 | 
			
		||||
			return 1;
 | 
			
		||||
		break;
 | 
			
		||||
	case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST:
 | 
			
		||||
	_scsih_check_pcie_topo_remove_events(ioc,
 | 
			
		||||
		    (Mpi26EventDataPCIeTopologyChangeList_t *)
 | 
			
		||||
		    mpi_reply->EventData);
 | 
			
		||||
		if (ioc->shost_recovery)
 | 
			
		||||
			return 1;
 | 
			
		||||
		break;
 | 
			
		||||
	case MPI2_EVENT_IR_CONFIGURATION_CHANGE_LIST:
 | 
			
		||||
		_scsih_check_ir_config_unhide_events(ioc,
 | 
			
		||||
| 
						 | 
				
			
			@ -11284,13 +11348,27 @@ _scsih_probe_boot_devices(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
 | 
			
		||||
	if (channel == RAID_CHANNEL) {
 | 
			
		||||
		raid_device = device;
 | 
			
		||||
		/*
 | 
			
		||||
		 * If this boot vd is already registered with SML then
 | 
			
		||||
		 * no need to register it again as part of device scanning
 | 
			
		||||
		 * after diag reset during driver load operation.
 | 
			
		||||
		 */
 | 
			
		||||
		if (raid_device->starget)
 | 
			
		||||
			return;
 | 
			
		||||
		rc = scsi_add_device(ioc->shost, RAID_CHANNEL,
 | 
			
		||||
		    raid_device->id, 0);
 | 
			
		||||
		if (rc)
 | 
			
		||||
			_scsih_raid_device_remove(ioc, raid_device);
 | 
			
		||||
	} else if (channel == PCIE_CHANNEL) {
 | 
			
		||||
		spin_lock_irqsave(&ioc->pcie_device_lock, flags);
 | 
			
		||||
		pcie_device = device;
 | 
			
		||||
		/*
 | 
			
		||||
		 * If this boot NVMe device is already registered with SML then
 | 
			
		||||
		 * no need to register it again as part of device scanning
 | 
			
		||||
		 * after diag reset during driver load operation.
 | 
			
		||||
		 */
 | 
			
		||||
		if (pcie_device->starget)
 | 
			
		||||
			return;
 | 
			
		||||
		spin_lock_irqsave(&ioc->pcie_device_lock, flags);
 | 
			
		||||
		tid = pcie_device->id;
 | 
			
		||||
		list_move_tail(&pcie_device->list, &ioc->pcie_device_list);
 | 
			
		||||
		spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
 | 
			
		||||
| 
						 | 
				
			
			@ -11298,8 +11376,15 @@ _scsih_probe_boot_devices(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		|||
		if (rc)
 | 
			
		||||
			_scsih_pcie_device_remove(ioc, pcie_device);
 | 
			
		||||
	} else {
 | 
			
		||||
		spin_lock_irqsave(&ioc->sas_device_lock, flags);
 | 
			
		||||
		sas_device = device;
 | 
			
		||||
		/*
 | 
			
		||||
		 * If this boot sas/sata device is already registered with SML
 | 
			
		||||
		 * then no need to register it again as part of device scanning
 | 
			
		||||
		 * after diag reset during driver load operation.
 | 
			
		||||
		 */
 | 
			
		||||
		if (sas_device->starget)
 | 
			
		||||
			return;
 | 
			
		||||
		spin_lock_irqsave(&ioc->sas_device_lock, flags);
 | 
			
		||||
		handle = sas_device->handle;
 | 
			
		||||
		sas_address_parent = sas_device->sas_address_parent;
 | 
			
		||||
		sas_address = sas_device->sas_address;
 | 
			
		||||
| 
						 | 
				
			
			@ -11597,6 +11682,25 @@ scsih_scan_start(struct Scsi_Host *shost)
 | 
			
		|||
		ioc_info(ioc, "port enable: FAILED\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * _scsih_complete_devices_scanning - add the devices to sml and
 | 
			
		||||
 * complete ioc initialization.
 | 
			
		||||
 * @ioc: per adapter object
 | 
			
		||||
 *
 | 
			
		||||
 * Return nothing.
 | 
			
		||||
 */
 | 
			
		||||
static void _scsih_complete_devices_scanning(struct MPT3SAS_ADAPTER *ioc)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
	if (ioc->wait_for_discovery_to_complete) {
 | 
			
		||||
		ioc->wait_for_discovery_to_complete = 0;
 | 
			
		||||
		_scsih_probe_devices(ioc);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mpt3sas_base_start_watchdog(ioc);
 | 
			
		||||
	ioc->is_driver_loading = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * scsih_scan_finished - scsi lld callback for .scan_finished
 | 
			
		||||
 * @shost: SCSI host pointer
 | 
			
		||||
| 
						 | 
				
			
			@ -11610,6 +11714,8 @@ static int
 | 
			
		|||
scsih_scan_finished(struct Scsi_Host *shost, unsigned long time)
 | 
			
		||||
{
 | 
			
		||||
	struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
 | 
			
		||||
	u32 ioc_state;
 | 
			
		||||
	int issue_hard_reset = 0;
 | 
			
		||||
 | 
			
		||||
	if (disable_discovery > 0) {
 | 
			
		||||
		ioc->is_driver_loading = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -11624,9 +11730,30 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time)
 | 
			
		|||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (ioc->start_scan)
 | 
			
		||||
	if (ioc->start_scan) {
 | 
			
		||||
		ioc_state = mpt3sas_base_get_iocstate(ioc, 0);
 | 
			
		||||
		if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
 | 
			
		||||
			mpt3sas_print_fault_code(ioc, ioc_state &
 | 
			
		||||
			    MPI2_DOORBELL_DATA_MASK);
 | 
			
		||||
			issue_hard_reset = 1;
 | 
			
		||||
			goto out;
 | 
			
		||||
		} else if ((ioc_state & MPI2_IOC_STATE_MASK) ==
 | 
			
		||||
				MPI2_IOC_STATE_COREDUMP) {
 | 
			
		||||
			mpt3sas_base_coredump_info(ioc, ioc_state &
 | 
			
		||||
			    MPI2_DOORBELL_DATA_MASK);
 | 
			
		||||
			mpt3sas_base_wait_for_coredump_completion(ioc, __func__);
 | 
			
		||||
			issue_hard_reset = 1;
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (ioc->port_enable_cmds.status & MPT3_CMD_RESET) {
 | 
			
		||||
		ioc_info(ioc,
 | 
			
		||||
		    "port enable: aborted due to diag reset\n");
 | 
			
		||||
		ioc->port_enable_cmds.status = MPT3_CMD_NOT_USED;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
	if (ioc->start_scan_failed) {
 | 
			
		||||
		ioc_info(ioc, "port enable: FAILED with (ioc_status=0x%08x)\n",
 | 
			
		||||
			 ioc->start_scan_failed);
 | 
			
		||||
| 
						 | 
				
			
			@ -11638,13 +11765,14 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time)
 | 
			
		|||
 | 
			
		||||
	ioc_info(ioc, "port enable: SUCCESS\n");
 | 
			
		||||
	ioc->port_enable_cmds.status = MPT3_CMD_NOT_USED;
 | 
			
		||||
	_scsih_complete_devices_scanning(ioc);
 | 
			
		||||
 | 
			
		||||
	if (ioc->wait_for_discovery_to_complete) {
 | 
			
		||||
		ioc->wait_for_discovery_to_complete = 0;
 | 
			
		||||
		_scsih_probe_devices(ioc);
 | 
			
		||||
out:
 | 
			
		||||
	if (issue_hard_reset) {
 | 
			
		||||
		ioc->port_enable_cmds.status = MPT3_CMD_NOT_USED;
 | 
			
		||||
		if (mpt3sas_base_hard_reset_handler(ioc, SOFT_RESET))
 | 
			
		||||
			ioc->is_driver_loading = 0;
 | 
			
		||||
	}
 | 
			
		||||
	mpt3sas_base_start_watchdog(ioc);
 | 
			
		||||
	ioc->is_driver_loading = 0;
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue