mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	block-5.9-2020-09-04
-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl9SWMMQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgphIcD/488Q7rXb2eABp1fGs4gu+VFOCLogeHL8xh
 5xHNiOPnZG2SGr8DQJY/7EX2kE65rbZi8/g+2N6anovI2nduRu0tzSra7fRgzbys
 ZQC1CUel0MbCd7e8OaEfg108PSHNxBf1PqDcE7zCeyZ0DIs3s4vK/bQtmzzxZHgU
 wNw4OIP9gOdqgjowb6GGHo9SLN4GT8rZ0jZVPLa7GwFsvxCTwv/7lHO8rqeSeuCu
 5H6i3M/rSbtTXPLHf4Fy97x9WmBmdgu4epTXiwbOxaagpx3lm/7n1P3CpavR+Gcq
 O5VGIIzazxPwnZl9y/6rZFLGYqcj38RxUvC8KtK6tDXxEu/BDJa1d6hXI03SyXAO
 ZAiEpQTKOkJE3R8ewUDrXLvl3p6FvwZVZ5SIFwUb+0JFrVQYwrgfoRJtzb5SIUan
 T9/bSYge7lFRI92FZRIqhvk8rsEBRdu7N/rQCyGf6GuZ0vRXWRAqN7T02iDn3czX
 pdGAepU5ymw8CwyUiNNnkY0DUaQLBIO9tCA9epxLwdroQ95vJtMPRBX1STQ65GVk
 XvMFAJqDAehQ/nP5xO60cWGZHyL7L/ccpofZlA/ytgAIZRa85GvhrdVy7yc6DKto
 wu6h2tkX9+ldoUjVbn/60T+Ft3QUTlfAuDfherkNoFNB/G5i1pzOHbwvL7B3czr3
 ZMjoNiOIqA==
 =8fvz
 -----END PGP SIGNATURE-----
Merge tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
 "A bit larger than usual this week, mostly due to the NVMe fixes
  arriving late for -rc3 and hence didn't make last weeks pull request.
   - NVMe:
        - instance leak and io boundary fixes from Keith
        - fc locking fix from Christophe
        - various tcp/rdma reset during traffic fixes from Sagi
        - pci use-after-free fix from Tong
        - tcp target null deref fix from Ziye
   - Locking fix for partition removal (Christoph)
   - Ensure bdi->io_pages is always set (me)
   - Fixup for hd struct reference (Ming)
   - Fix for zero length bvecs (Ming)
   - Two small blk-iocost fixes (Tejun)"
* tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block:
  block: allow for_each_bvec to support zero len bvec
  blk-stat: make q->stats->lock irqsafe
  blk-iocost: ioc_pd_free() shouldn't assume irq disabled
  block: fix locking in bdev_del_partition
  block: release disk reference in hd_struct_free_work
  block: ensure bdi->io_pages is always initialized
  nvme-pci: cancel nvme device request before disabling
  nvme: only use power of two io boundaries
  nvme: fix controller instance leak
  nvmet-fc: Fix a missed _irqsave version of spin_lock in 'nvmet_fc_fod_op_done()'
  nvme: Fix NULL dereference for pci nvme controllers
  nvme-rdma: fix reset hang if controller died in the middle of a reset
  nvme-rdma: fix timeout handler
  nvme-rdma: serialize controller teardown sequences
  nvme-tcp: fix reset hang if controller died in the middle of a reset
  nvme-tcp: fix timeout handler
  nvme-tcp: serialize controller teardown sequences
  nvme: have nvme_wait_freeze_timeout return if it timed out
  nvme-fabrics: don't check state NVME_CTRL_NEW for request acceptance
  nvmet-tcp: Fix NULL dereference when a connect data comes in h2cdata pdu
			
			
This commit is contained in:
		
						commit
						8075fc3b11
					
				
					 13 changed files with 212 additions and 82 deletions
				
			
		|  | @ -539,6 +539,7 @@ struct request_queue *blk_alloc_queue(int node_id) | |||
| 		goto fail_stats; | ||||
| 
 | ||||
| 	q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES; | ||||
| 	q->backing_dev_info->io_pages = VM_READAHEAD_PAGES; | ||||
| 	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; | ||||
| 	q->node = node_id; | ||||
| 
 | ||||
|  |  | |||
|  | @ -2092,14 +2092,15 @@ static void ioc_pd_free(struct blkg_policy_data *pd) | |||
| { | ||||
| 	struct ioc_gq *iocg = pd_to_iocg(pd); | ||||
| 	struct ioc *ioc = iocg->ioc; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	if (ioc) { | ||||
| 		spin_lock(&ioc->lock); | ||||
| 		spin_lock_irqsave(&ioc->lock, flags); | ||||
| 		if (!list_empty(&iocg->active_list)) { | ||||
| 			propagate_active_weight(iocg, 0, 0); | ||||
| 			list_del_init(&iocg->active_list); | ||||
| 		} | ||||
| 		spin_unlock(&ioc->lock); | ||||
| 		spin_unlock_irqrestore(&ioc->lock, flags); | ||||
| 
 | ||||
| 		hrtimer_cancel(&iocg->waitq_timer); | ||||
| 		hrtimer_cancel(&iocg->delay_timer); | ||||
|  |  | |||
|  | @ -137,6 +137,7 @@ void blk_stat_add_callback(struct request_queue *q, | |||
| 			   struct blk_stat_callback *cb) | ||||
| { | ||||
| 	unsigned int bucket; | ||||
| 	unsigned long flags; | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
|  | @ -147,20 +148,22 @@ void blk_stat_add_callback(struct request_queue *q, | |||
| 			blk_rq_stat_init(&cpu_stat[bucket]); | ||||
| 	} | ||||
| 
 | ||||
| 	spin_lock(&q->stats->lock); | ||||
| 	spin_lock_irqsave(&q->stats->lock, flags); | ||||
| 	list_add_tail_rcu(&cb->list, &q->stats->callbacks); | ||||
| 	blk_queue_flag_set(QUEUE_FLAG_STATS, q); | ||||
| 	spin_unlock(&q->stats->lock); | ||||
| 	spin_unlock_irqrestore(&q->stats->lock, flags); | ||||
| } | ||||
| 
 | ||||
| void blk_stat_remove_callback(struct request_queue *q, | ||||
| 			      struct blk_stat_callback *cb) | ||||
| { | ||||
| 	spin_lock(&q->stats->lock); | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&q->stats->lock, flags); | ||||
| 	list_del_rcu(&cb->list); | ||||
| 	if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting) | ||||
| 		blk_queue_flag_clear(QUEUE_FLAG_STATS, q); | ||||
| 	spin_unlock(&q->stats->lock); | ||||
| 	spin_unlock_irqrestore(&q->stats->lock, flags); | ||||
| 
 | ||||
| 	del_timer_sync(&cb->timer); | ||||
| } | ||||
|  | @ -183,10 +186,12 @@ void blk_stat_free_callback(struct blk_stat_callback *cb) | |||
| 
 | ||||
| void blk_stat_enable_accounting(struct request_queue *q) | ||||
| { | ||||
| 	spin_lock(&q->stats->lock); | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&q->stats->lock, flags); | ||||
| 	q->stats->enable_accounting = true; | ||||
| 	blk_queue_flag_set(QUEUE_FLAG_STATS, q); | ||||
| 	spin_unlock(&q->stats->lock); | ||||
| 	spin_unlock_irqrestore(&q->stats->lock, flags); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); | ||||
| 
 | ||||
|  |  | |||
|  | @ -278,6 +278,15 @@ static void hd_struct_free_work(struct work_struct *work) | |||
| { | ||||
| 	struct hd_struct *part = | ||||
| 		container_of(to_rcu_work(work), struct hd_struct, rcu_work); | ||||
| 	struct gendisk *disk = part_to_disk(part); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Release the disk reference acquired in delete_partition here. | ||||
| 	 * We can't release it in hd_struct_free because the final put_device | ||||
| 	 * needs process context and thus can't be run directly from a | ||||
| 	 * percpu_ref ->release handler. | ||||
| 	 */ | ||||
| 	put_device(disk_to_dev(disk)); | ||||
| 
 | ||||
| 	part->start_sect = 0; | ||||
| 	part->nr_sects = 0; | ||||
|  | @ -293,7 +302,6 @@ static void hd_struct_free(struct percpu_ref *ref) | |||
| 		rcu_dereference_protected(disk->part_tbl, 1); | ||||
| 
 | ||||
| 	rcu_assign_pointer(ptbl->last_lookup, NULL); | ||||
| 	put_device(disk_to_dev(disk)); | ||||
| 
 | ||||
| 	INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work); | ||||
| 	queue_rcu_work(system_wq, &part->rcu_work); | ||||
|  | @ -524,19 +532,20 @@ int bdev_add_partition(struct block_device *bdev, int partno, | |||
| int bdev_del_partition(struct block_device *bdev, int partno) | ||||
| { | ||||
| 	struct block_device *bdevp; | ||||
| 	struct hd_struct *part; | ||||
| 	int ret = 0; | ||||
| 	struct hd_struct *part = NULL; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	part = disk_get_part(bdev->bd_disk, partno); | ||||
| 	if (!part) | ||||
| 		return -ENXIO; | ||||
| 
 | ||||
| 	ret = -ENOMEM; | ||||
| 	bdevp = bdget(part_devt(part)); | ||||
| 	bdevp = bdget_disk(bdev->bd_disk, partno); | ||||
| 	if (!bdevp) | ||||
| 		goto out_put_part; | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	mutex_lock(&bdevp->bd_mutex); | ||||
| 	mutex_lock_nested(&bdev->bd_mutex, 1); | ||||
| 
 | ||||
| 	ret = -ENXIO; | ||||
| 	part = disk_get_part(bdev->bd_disk, partno); | ||||
| 	if (!part) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	ret = -EBUSY; | ||||
| 	if (bdevp->bd_openers) | ||||
|  | @ -545,16 +554,14 @@ int bdev_del_partition(struct block_device *bdev, int partno) | |||
| 	sync_blockdev(bdevp); | ||||
| 	invalidate_bdev(bdevp); | ||||
| 
 | ||||
| 	mutex_lock_nested(&bdev->bd_mutex, 1); | ||||
| 	delete_partition(bdev->bd_disk, part); | ||||
| 	mutex_unlock(&bdev->bd_mutex); | ||||
| 
 | ||||
| 	ret = 0; | ||||
| out_unlock: | ||||
| 	mutex_unlock(&bdev->bd_mutex); | ||||
| 	mutex_unlock(&bdevp->bd_mutex); | ||||
| 	bdput(bdevp); | ||||
| out_put_part: | ||||
| 	disk_put_part(part); | ||||
| 	if (part) | ||||
| 		disk_put_part(part); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk, | |||
| 	blk_mq_unfreeze_queue(disk->queue); | ||||
| } | ||||
| 
 | ||||
| static inline bool nvme_first_scan(struct gendisk *disk) | ||||
| { | ||||
| 	/* nvme_alloc_ns() scans the disk prior to adding it */ | ||||
| 	return !(disk->flags & GENHD_FL_UP); | ||||
| } | ||||
| 
 | ||||
| static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) | ||||
| { | ||||
| 	struct nvme_ctrl *ctrl = ns->ctrl; | ||||
| 	u32 iob; | ||||
| 
 | ||||
| 	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && | ||||
| 	    is_power_of_2(ctrl->max_hw_sectors)) | ||||
| 		iob = ctrl->max_hw_sectors; | ||||
| 	else | ||||
| 		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); | ||||
| 
 | ||||
| 	if (!iob) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (!is_power_of_2(iob)) { | ||||
| 		if (nvme_first_scan(ns->disk)) | ||||
| 			pr_warn("%s: ignoring unaligned IO boundary:%u\n", | ||||
| 				ns->disk->disk_name, iob); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (blk_queue_is_zoned(ns->disk->queue)) { | ||||
| 		if (nvme_first_scan(ns->disk)) | ||||
| 			pr_warn("%s: ignoring zoned namespace IO boundary\n", | ||||
| 				ns->disk->disk_name); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	blk_queue_chunk_sectors(ns->queue, iob); | ||||
| } | ||||
| 
 | ||||
| static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) | ||||
| { | ||||
| 	unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; | ||||
| 	struct nvme_ns *ns = disk->private_data; | ||||
| 	struct nvme_ctrl *ctrl = ns->ctrl; | ||||
| 	int ret; | ||||
| 	u32 iob; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If identify namespace failed, use default 512 byte block size so | ||||
|  | @ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) | |||
| 		return -ENODEV; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && | ||||
| 	    is_power_of_2(ctrl->max_hw_sectors)) | ||||
| 		iob = ctrl->max_hw_sectors; | ||||
| 	else | ||||
| 		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); | ||||
| 
 | ||||
| 	ns->features = 0; | ||||
| 	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); | ||||
| 	/* the PI implementation requires metadata equal t10 pi tuple size */ | ||||
|  | @ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (iob && !blk_queue_is_zoned(ns->queue)) | ||||
| 		blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); | ||||
| 	nvme_set_chunk_sectors(ns, id); | ||||
| 	nvme_update_disk_info(disk, ns, id); | ||||
| #ifdef CONFIG_NVME_MULTIPATH | ||||
| 	if (ns->head->disk) { | ||||
|  | @ -3676,6 +3705,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, | |||
| 		return 0; | ||||
| 	if (a == &dev_attr_hostid.attr && !ctrl->opts) | ||||
| 		return 0; | ||||
| 	if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts) | ||||
| 		return 0; | ||||
| 	if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	return a->mode; | ||||
| } | ||||
|  | @ -4390,7 +4423,7 @@ static void nvme_free_ctrl(struct device *dev) | |||
| 	struct nvme_subsystem *subsys = ctrl->subsys; | ||||
| 	struct nvme_cel *cel, *next; | ||||
| 
 | ||||
| 	if (subsys && ctrl->instance != subsys->instance) | ||||
| 	if (!subsys || ctrl->instance != subsys->instance) | ||||
| 		ida_simple_remove(&nvme_instance_ida, ctrl->instance); | ||||
| 
 | ||||
| 	list_for_each_entry_safe(cel, next, &ctrl->cels, entry) { | ||||
|  | @ -4534,7 +4567,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) | |||
| } | ||||
| EXPORT_SYMBOL_GPL(nvme_unfreeze); | ||||
| 
 | ||||
| void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) | ||||
| int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) | ||||
| { | ||||
| 	struct nvme_ns *ns; | ||||
| 
 | ||||
|  | @ -4545,6 +4578,7 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) | |||
| 			break; | ||||
| 	} | ||||
| 	up_read(&ctrl->namespaces_rwsem); | ||||
| 	return timeout; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); | ||||
| 
 | ||||
|  |  | |||
|  | @ -576,7 +576,6 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, | |||
| 	 * which is require to set the queue live in the appropinquate states. | ||||
| 	 */ | ||||
| 	switch (ctrl->state) { | ||||
| 	case NVME_CTRL_NEW: | ||||
| 	case NVME_CTRL_CONNECTING: | ||||
| 		if (nvme_is_fabrics(req->cmd) && | ||||
| 		    req->cmd->fabrics.fctype == nvme_fabrics_type_connect) | ||||
|  |  | |||
|  | @ -605,7 +605,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl); | |||
| void nvme_sync_queues(struct nvme_ctrl *ctrl); | ||||
| void nvme_unfreeze(struct nvme_ctrl *ctrl); | ||||
| void nvme_wait_freeze(struct nvme_ctrl *ctrl); | ||||
| void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); | ||||
| int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); | ||||
| void nvme_start_freeze(struct nvme_ctrl *ctrl); | ||||
| 
 | ||||
| #define NVME_QID_ANY -1 | ||||
|  |  | |||
|  | @ -1249,8 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | |||
| 		dev_warn_ratelimited(dev->ctrl.device, | ||||
| 			 "I/O %d QID %d timeout, disable controller\n", | ||||
| 			 req->tag, nvmeq->qid); | ||||
| 		nvme_dev_disable(dev, true); | ||||
| 		nvme_req(req)->flags |= NVME_REQ_CANCELLED; | ||||
| 		nvme_dev_disable(dev, true); | ||||
| 		return BLK_EH_DONE; | ||||
| 	case NVME_CTRL_RESETTING: | ||||
| 		return BLK_EH_RESET_TIMER; | ||||
|  | @ -1267,10 +1267,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | |||
| 		dev_warn(dev->ctrl.device, | ||||
| 			 "I/O %d QID %d timeout, reset controller\n", | ||||
| 			 req->tag, nvmeq->qid); | ||||
| 		nvme_req(req)->flags |= NVME_REQ_CANCELLED; | ||||
| 		nvme_dev_disable(dev, false); | ||||
| 		nvme_reset_ctrl(&dev->ctrl); | ||||
| 
 | ||||
| 		nvme_req(req)->flags |= NVME_REQ_CANCELLED; | ||||
| 		return BLK_EH_DONE; | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -122,6 +122,7 @@ struct nvme_rdma_ctrl { | |||
| 	struct sockaddr_storage src_addr; | ||||
| 
 | ||||
| 	struct nvme_ctrl	ctrl; | ||||
| 	struct mutex		teardown_lock; | ||||
| 	bool			use_inline_data; | ||||
| 	u32			io_queues[HCTX_MAX_TYPES]; | ||||
| }; | ||||
|  | @ -975,7 +976,15 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) | |||
| 
 | ||||
| 	if (!new) { | ||||
| 		nvme_start_queues(&ctrl->ctrl); | ||||
| 		nvme_wait_freeze(&ctrl->ctrl); | ||||
| 		if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { | ||||
| 			/*
 | ||||
| 			 * If we timed out waiting for freeze we are likely to | ||||
| 			 * be stuck.  Fail the controller initialization just | ||||
| 			 * to be safe. | ||||
| 			 */ | ||||
| 			ret = -ENODEV; | ||||
| 			goto out_wait_freeze_timed_out; | ||||
| 		} | ||||
| 		blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset, | ||||
| 			ctrl->ctrl.queue_count - 1); | ||||
| 		nvme_unfreeze(&ctrl->ctrl); | ||||
|  | @ -983,6 +992,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) | |||
| 
 | ||||
| 	return 0; | ||||
| 
 | ||||
| out_wait_freeze_timed_out: | ||||
| 	nvme_stop_queues(&ctrl->ctrl); | ||||
| 	nvme_rdma_stop_io_queues(ctrl); | ||||
| out_cleanup_connect_q: | ||||
| 	if (new) | ||||
| 		blk_cleanup_queue(ctrl->ctrl.connect_q); | ||||
|  | @ -997,6 +1009,7 @@ out_free_io_queues: | |||
| static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, | ||||
| 		bool remove) | ||||
| { | ||||
| 	mutex_lock(&ctrl->teardown_lock); | ||||
| 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q); | ||||
| 	nvme_rdma_stop_queue(&ctrl->queues[0]); | ||||
| 	if (ctrl->ctrl.admin_tagset) { | ||||
|  | @ -1007,11 +1020,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, | |||
| 	if (remove) | ||||
| 		blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); | ||||
| 	nvme_rdma_destroy_admin_queue(ctrl, remove); | ||||
| 	mutex_unlock(&ctrl->teardown_lock); | ||||
| } | ||||
| 
 | ||||
| static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, | ||||
| 		bool remove) | ||||
| { | ||||
| 	mutex_lock(&ctrl->teardown_lock); | ||||
| 	if (ctrl->ctrl.queue_count > 1) { | ||||
| 		nvme_start_freeze(&ctrl->ctrl); | ||||
| 		nvme_stop_queues(&ctrl->ctrl); | ||||
|  | @ -1025,6 +1040,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, | |||
| 			nvme_start_queues(&ctrl->ctrl); | ||||
| 		nvme_rdma_destroy_io_queues(ctrl, remove); | ||||
| 	} | ||||
| 	mutex_unlock(&ctrl->teardown_lock); | ||||
| } | ||||
| 
 | ||||
| static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) | ||||
|  | @ -1180,6 +1196,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) | |||
| 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) | ||||
| 		return; | ||||
| 
 | ||||
| 	dev_warn(ctrl->ctrl.device, "starting error recovery\n"); | ||||
| 	queue_work(nvme_reset_wq, &ctrl->err_work); | ||||
| } | ||||
| 
 | ||||
|  | @ -1946,6 +1963,22 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void nvme_rdma_complete_timed_out(struct request *rq) | ||||
| { | ||||
| 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); | ||||
| 	struct nvme_rdma_queue *queue = req->queue; | ||||
| 	struct nvme_rdma_ctrl *ctrl = queue->ctrl; | ||||
| 
 | ||||
| 	/* fence other contexts that may complete the command */ | ||||
| 	mutex_lock(&ctrl->teardown_lock); | ||||
| 	nvme_rdma_stop_queue(queue); | ||||
| 	if (!blk_mq_request_completed(rq)) { | ||||
| 		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; | ||||
| 		blk_mq_complete_request(rq); | ||||
| 	} | ||||
| 	mutex_unlock(&ctrl->teardown_lock); | ||||
| } | ||||
| 
 | ||||
| static enum blk_eh_timer_return | ||||
| nvme_rdma_timeout(struct request *rq, bool reserved) | ||||
| { | ||||
|  | @ -1956,29 +1989,29 @@ nvme_rdma_timeout(struct request *rq, bool reserved) | |||
| 	dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", | ||||
| 		 rq->tag, nvme_rdma_queue_idx(queue)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Restart the timer if a controller reset is already scheduled. Any | ||||
| 	 * timed out commands would be handled before entering the connecting | ||||
| 	 * state. | ||||
| 	 */ | ||||
| 	if (ctrl->ctrl.state == NVME_CTRL_RESETTING) | ||||
| 		return BLK_EH_RESET_TIMER; | ||||
| 
 | ||||
| 	if (ctrl->ctrl.state != NVME_CTRL_LIVE) { | ||||
| 		/*
 | ||||
| 		 * Teardown immediately if controller times out while starting | ||||
| 		 * or we are already started error recovery. all outstanding | ||||
| 		 * requests are completed on shutdown, so we return BLK_EH_DONE. | ||||
| 		 * If we are resetting, connecting or deleting we should | ||||
| 		 * complete immediately because we may block controller | ||||
| 		 * teardown or setup sequence | ||||
| 		 * - ctrl disable/shutdown fabrics requests | ||||
| 		 * - connect requests | ||||
| 		 * - initialization admin requests | ||||
| 		 * - I/O requests that entered after unquiescing and | ||||
| 		 *   the controller stopped responding | ||||
| 		 * | ||||
| 		 * All other requests should be cancelled by the error | ||||
| 		 * recovery work, so it's fine that we fail it here. | ||||
| 		 */ | ||||
| 		flush_work(&ctrl->err_work); | ||||
| 		nvme_rdma_teardown_io_queues(ctrl, false); | ||||
| 		nvme_rdma_teardown_admin_queue(ctrl, false); | ||||
| 		nvme_rdma_complete_timed_out(rq); | ||||
| 		return BLK_EH_DONE; | ||||
| 	} | ||||
| 
 | ||||
| 	dev_warn(ctrl->ctrl.device, "starting error recovery\n"); | ||||
| 	/*
 | ||||
| 	 * LIVE state should trigger the normal error recovery which will | ||||
| 	 * handle completing this request. | ||||
| 	 */ | ||||
| 	nvme_rdma_error_recovery(ctrl); | ||||
| 
 | ||||
| 	return BLK_EH_RESET_TIMER; | ||||
| } | ||||
| 
 | ||||
|  | @ -2278,6 +2311,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, | |||
| 		return ERR_PTR(-ENOMEM); | ||||
| 	ctrl->ctrl.opts = opts; | ||||
| 	INIT_LIST_HEAD(&ctrl->list); | ||||
| 	mutex_init(&ctrl->teardown_lock); | ||||
| 
 | ||||
| 	if (!(opts->mask & NVMF_OPT_TRSVCID)) { | ||||
| 		opts->trsvcid = | ||||
|  |  | |||
|  | @ -124,6 +124,7 @@ struct nvme_tcp_ctrl { | |||
| 	struct sockaddr_storage src_addr; | ||||
| 	struct nvme_ctrl	ctrl; | ||||
| 
 | ||||
| 	struct mutex		teardown_lock; | ||||
| 	struct work_struct	err_work; | ||||
| 	struct delayed_work	connect_work; | ||||
| 	struct nvme_tcp_request async_req; | ||||
|  | @ -464,6 +465,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) | |||
| 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) | ||||
| 		return; | ||||
| 
 | ||||
| 	dev_warn(ctrl->device, "starting error recovery\n"); | ||||
| 	queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); | ||||
| } | ||||
| 
 | ||||
|  | @ -1526,7 +1528,6 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) | |||
| 
 | ||||
| 	if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) | ||||
| 		return; | ||||
| 
 | ||||
| 	__nvme_tcp_stop_queue(queue); | ||||
| } | ||||
| 
 | ||||
|  | @ -1781,7 +1782,15 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) | |||
| 
 | ||||
| 	if (!new) { | ||||
| 		nvme_start_queues(ctrl); | ||||
| 		nvme_wait_freeze(ctrl); | ||||
| 		if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { | ||||
| 			/*
 | ||||
| 			 * If we timed out waiting for freeze we are likely to | ||||
| 			 * be stuck.  Fail the controller initialization just | ||||
| 			 * to be safe. | ||||
| 			 */ | ||||
| 			ret = -ENODEV; | ||||
| 			goto out_wait_freeze_timed_out; | ||||
| 		} | ||||
| 		blk_mq_update_nr_hw_queues(ctrl->tagset, | ||||
| 			ctrl->queue_count - 1); | ||||
| 		nvme_unfreeze(ctrl); | ||||
|  | @ -1789,6 +1798,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) | |||
| 
 | ||||
| 	return 0; | ||||
| 
 | ||||
| out_wait_freeze_timed_out: | ||||
| 	nvme_stop_queues(ctrl); | ||||
| 	nvme_tcp_stop_io_queues(ctrl); | ||||
| out_cleanup_connect_q: | ||||
| 	if (new) | ||||
| 		blk_cleanup_queue(ctrl->connect_q); | ||||
|  | @ -1874,6 +1886,7 @@ out_free_queue: | |||
| static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, | ||||
| 		bool remove) | ||||
| { | ||||
| 	mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); | ||||
| 	blk_mq_quiesce_queue(ctrl->admin_q); | ||||
| 	nvme_tcp_stop_queue(ctrl, 0); | ||||
| 	if (ctrl->admin_tagset) { | ||||
|  | @ -1884,13 +1897,16 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, | |||
| 	if (remove) | ||||
| 		blk_mq_unquiesce_queue(ctrl->admin_q); | ||||
| 	nvme_tcp_destroy_admin_queue(ctrl, remove); | ||||
| 	mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); | ||||
| } | ||||
| 
 | ||||
| static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, | ||||
| 		bool remove) | ||||
| { | ||||
| 	mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); | ||||
| 	if (ctrl->queue_count <= 1) | ||||
| 		return; | ||||
| 		goto out; | ||||
| 	blk_mq_quiesce_queue(ctrl->admin_q); | ||||
| 	nvme_start_freeze(ctrl); | ||||
| 	nvme_stop_queues(ctrl); | ||||
| 	nvme_tcp_stop_io_queues(ctrl); | ||||
|  | @ -1902,6 +1918,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, | |||
| 	if (remove) | ||||
| 		nvme_start_queues(ctrl); | ||||
| 	nvme_tcp_destroy_io_queues(ctrl, remove); | ||||
| out: | ||||
| 	mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); | ||||
| } | ||||
| 
 | ||||
| static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) | ||||
|  | @ -2148,40 +2166,55 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) | |||
| 	nvme_tcp_queue_request(&ctrl->async_req, true, true); | ||||
| } | ||||
| 
 | ||||
| static void nvme_tcp_complete_timed_out(struct request *rq) | ||||
| { | ||||
| 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | ||||
| 	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; | ||||
| 
 | ||||
| 	/* fence other contexts that may complete the command */ | ||||
| 	mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); | ||||
| 	nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); | ||||
| 	if (!blk_mq_request_completed(rq)) { | ||||
| 		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; | ||||
| 		blk_mq_complete_request(rq); | ||||
| 	} | ||||
| 	mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); | ||||
| } | ||||
| 
 | ||||
| static enum blk_eh_timer_return | ||||
| nvme_tcp_timeout(struct request *rq, bool reserved) | ||||
| { | ||||
| 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | ||||
| 	struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; | ||||
| 	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; | ||||
| 	struct nvme_tcp_cmd_pdu *pdu = req->pdu; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Restart the timer if a controller reset is already scheduled. Any | ||||
| 	 * timed out commands would be handled before entering the connecting | ||||
| 	 * state. | ||||
| 	 */ | ||||
| 	if (ctrl->ctrl.state == NVME_CTRL_RESETTING) | ||||
| 		return BLK_EH_RESET_TIMER; | ||||
| 
 | ||||
| 	dev_warn(ctrl->ctrl.device, | ||||
| 	dev_warn(ctrl->device, | ||||
| 		"queue %d: timeout request %#x type %d\n", | ||||
| 		nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); | ||||
| 
 | ||||
| 	if (ctrl->ctrl.state != NVME_CTRL_LIVE) { | ||||
| 	if (ctrl->state != NVME_CTRL_LIVE) { | ||||
| 		/*
 | ||||
| 		 * Teardown immediately if controller times out while starting | ||||
| 		 * or we are already started error recovery. all outstanding | ||||
| 		 * requests are completed on shutdown, so we return BLK_EH_DONE. | ||||
| 		 * If we are resetting, connecting or deleting we should | ||||
| 		 * complete immediately because we may block controller | ||||
| 		 * teardown or setup sequence | ||||
| 		 * - ctrl disable/shutdown fabrics requests | ||||
| 		 * - connect requests | ||||
| 		 * - initialization admin requests | ||||
| 		 * - I/O requests that entered after unquiescing and | ||||
| 		 *   the controller stopped responding | ||||
| 		 * | ||||
| 		 * All other requests should be cancelled by the error | ||||
| 		 * recovery work, so it's fine that we fail it here. | ||||
| 		 */ | ||||
| 		flush_work(&ctrl->err_work); | ||||
| 		nvme_tcp_teardown_io_queues(&ctrl->ctrl, false); | ||||
| 		nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false); | ||||
| 		nvme_tcp_complete_timed_out(rq); | ||||
| 		return BLK_EH_DONE; | ||||
| 	} | ||||
| 
 | ||||
| 	dev_warn(ctrl->ctrl.device, "starting error recovery\n"); | ||||
| 	nvme_tcp_error_recovery(&ctrl->ctrl); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * LIVE state should trigger the normal error recovery which will | ||||
| 	 * handle completing this request. | ||||
| 	 */ | ||||
| 	nvme_tcp_error_recovery(ctrl); | ||||
| 	return BLK_EH_RESET_TIMER; | ||||
| } | ||||
| 
 | ||||
|  | @ -2422,6 +2455,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, | |||
| 			nvme_tcp_reconnect_ctrl_work); | ||||
| 	INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); | ||||
| 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); | ||||
| 	mutex_init(&ctrl->teardown_lock); | ||||
| 
 | ||||
| 	if (!(opts->mask & NVMF_OPT_TRSVCID)) { | ||||
| 		opts->trsvcid = | ||||
|  |  | |||
|  | @ -2342,9 +2342,9 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) | |||
| 			return; | ||||
| 		if (fcpreq->fcp_error || | ||||
| 		    fcpreq->transferred_length != fcpreq->transfer_length) { | ||||
| 			spin_lock(&fod->flock); | ||||
| 			spin_lock_irqsave(&fod->flock, flags); | ||||
| 			fod->abort = true; | ||||
| 			spin_unlock(&fod->flock); | ||||
| 			spin_unlock_irqrestore(&fod->flock, flags); | ||||
| 
 | ||||
| 			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL); | ||||
| 			return; | ||||
|  |  | |||
|  | @ -160,6 +160,11 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); | |||
| static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, | ||||
| 		struct nvmet_tcp_cmd *cmd) | ||||
| { | ||||
| 	if (unlikely(!queue->nr_cmds)) { | ||||
| 		/* We didn't allocate cmds yet, send 0xffff */ | ||||
| 		return USHRT_MAX; | ||||
| 	} | ||||
| 
 | ||||
| 	return cmd - queue->cmds; | ||||
| } | ||||
| 
 | ||||
|  | @ -866,7 +871,10 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) | |||
| 	struct nvme_tcp_data_pdu *data = &queue->pdu.data; | ||||
| 	struct nvmet_tcp_cmd *cmd; | ||||
| 
 | ||||
| 	cmd = &queue->cmds[data->ttag]; | ||||
| 	if (likely(queue->nr_cmds)) | ||||
| 		cmd = &queue->cmds[data->ttag]; | ||||
| 	else | ||||
| 		cmd = &queue->connect; | ||||
| 
 | ||||
| 	if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { | ||||
| 		pr_err("ttag %u unexpected data offset %u (expected %u)\n", | ||||
|  |  | |||
|  | @ -117,11 +117,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) | ||||
| { | ||||
| 	iter->bi_bvec_done = 0; | ||||
| 	iter->bi_idx++; | ||||
| } | ||||
| 
 | ||||
| #define for_each_bvec(bvl, bio_vec, iter, start)			\ | ||||
| 	for (iter = (start);						\ | ||||
| 	     (iter).bi_size &&						\ | ||||
| 		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\ | ||||
| 	     bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) | ||||
| 	     (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter),	\ | ||||
| 		     (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) | ||||
| 
 | ||||
| /* for iterating one bio from start to end */ | ||||
| #define BVEC_ITER_ALL_INIT (struct bvec_iter)				\ | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds