mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	. various fixes and cleanups for request-based DM core
. add support for delaying the requeue of requests; used by DM multipath when all paths have failed and 'queue_if_no_path' is enabled . DM cache improvements to speedup the loading metadata and the writing of the hint array . fix potential for a dm-crypt crash on device teardown . remove dm_bufio_cond_resched() and just using cond_resched() . change DM multipath to return a reservation conflict error immediately; rather than failing the path and retrying (potentially indefinitely) -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJX7n9KAAoJEMUj8QotnQNab74IANm+rW2uYdpLNCxWUmcaih0d BK8dLS/Mz35S0TRSekvynuBcPx18VP2Zueulc+aHTWcT4sj79l6KnVYT9g6c98rL zzcv10QTteqhiiWwFmPHsZgv5dW8Y5wiRdt+SqcQ5sAHMFci6C05gzp9caNu7VTs fbcLUdyYm40y3j84Lx/+ABXgnBhq+40OTtdnYSkEmLtdscPLzwpHgPmMctkrEl7e 7mqGC1KbDDzartqOZOeGP2P2qOCNN21qA+8ctMw9Xyze33uwvj7Vx6cro6e28wMm ZClY9XNGlfuW9dCNtFR9o6NXS6NIK30UJbKqyZPPsK+70JrOgzh6GzQnwSXdyNs= =7SkG -----END PGP SIGNATURE----- Merge tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - various fixes and cleanups for request-based DM core - add support for delaying the requeue of requests; used by DM multipath when all paths have failed and 'queue_if_no_path' is enabled - DM cache improvements to speedup the loading metadata and the writing of the hint array - fix potential for a dm-crypt crash on device teardown - remove dm_bufio_cond_resched() and just using cond_resched() - change DM multipath to return a reservation conflict error immediately; rather than failing the path and retrying (potentially indefinitely) * tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits) dm mpath: always return reservation conflict without failing over dm bufio: remove dm_bufio_cond_resched() dm crypt: fix crash on exit dm cache metadata: switch to using the new cursor api for loading metadata dm array: introduce cursor api dm btree: introduce cursor api dm cache policy smq: distribute entries to random levels when switching to smq dm cache: speed up writing of the hint array dm array: add dm_array_new() dm mpath: delay the requeue of blk-mq requests while all paths down dm mpath: use dm_mq_kick_requeue_list() dm rq: introduce dm_mq_kick_requeue_list() dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests dm: convert wait loops to use autoremove_wake_function() dm: use signal_pending_state() in dm_wait_for_completion() dm: rename task state function arguments dm: add two lockdep_assert_held() statements dm rq: simplify dm_old_stop_queue() dm mpath: check if path's request_queue is dying in activate_path() ...
This commit is contained in:
		
						commit
						48915c2cbc
					
				
					 16 changed files with 716 additions and 265 deletions
				
			
		|  | @ -191,19 +191,6 @@ static void dm_bufio_unlock(struct dm_bufio_client *c) | |||
| 	mutex_unlock(&c->lock); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * FIXME Move to sched.h? | ||||
|  */ | ||||
| #ifdef CONFIG_PREEMPT_VOLUNTARY | ||||
| #  define dm_bufio_cond_resched()		\ | ||||
| do {						\ | ||||
| 	if (unlikely(need_resched()))		\ | ||||
| 		_cond_resched();		\ | ||||
| } while (0) | ||||
| #else | ||||
| #  define dm_bufio_cond_resched()                do { } while (0) | ||||
| #endif | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -741,7 +728,7 @@ static void __flush_write_list(struct list_head *write_list) | |||
| 			list_entry(write_list->next, struct dm_buffer, write_list); | ||||
| 		list_del(&b->write_list); | ||||
| 		submit_io(b, WRITE, b->block, write_endio); | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| 	blk_finish_plug(&plug); | ||||
| } | ||||
|  | @ -780,7 +767,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) | |||
| 			__unlink_buffer(b); | ||||
| 			return b; | ||||
| 		} | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| 
 | ||||
| 	list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) { | ||||
|  | @ -791,7 +778,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) | |||
| 			__unlink_buffer(b); | ||||
| 			return b; | ||||
| 		} | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| 
 | ||||
| 	return NULL; | ||||
|  | @ -923,7 +910,7 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, | |||
| 			return; | ||||
| 
 | ||||
| 		__write_dirty_buffer(b, write_list); | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  | @ -973,7 +960,7 @@ static void __check_watermark(struct dm_bufio_client *c, | |||
| 			return; | ||||
| 
 | ||||
| 		__free_buffer_wake(b); | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| 
 | ||||
| 	if (c->n_buffers[LIST_DIRTY] > threshold_buffers) | ||||
|  | @ -1170,7 +1157,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, | |||
| 				submit_io(b, READ, b->block, read_endio); | ||||
| 			dm_bufio_release(b); | ||||
| 
 | ||||
| 			dm_bufio_cond_resched(); | ||||
| 			cond_resched(); | ||||
| 
 | ||||
| 			if (!n_blocks) | ||||
| 				goto flush_plug; | ||||
|  | @ -1291,7 +1278,7 @@ again: | |||
| 		    !test_bit(B_WRITING, &b->state)) | ||||
| 			__relink_lru(b, LIST_CLEAN); | ||||
| 
 | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If we dropped the lock, the list is no longer consistent, | ||||
|  | @ -1574,7 +1561,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, | |||
| 				freed++; | ||||
| 			if (!--nr_to_scan || ((count - freed) <= retain_target)) | ||||
| 				return freed; | ||||
| 			dm_bufio_cond_resched(); | ||||
| 			cond_resched(); | ||||
| 		} | ||||
| 	} | ||||
| 	return freed; | ||||
|  | @ -1808,7 +1795,7 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) | |||
| 		if (__try_evict_buffer(b, 0)) | ||||
| 			count--; | ||||
| 
 | ||||
| 		dm_bufio_cond_resched(); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| 
 | ||||
| 	dm_bufio_unlock(c); | ||||
|  |  | |||
|  | @ -140,6 +140,13 @@ struct dm_cache_metadata { | |||
| 	 * the device. | ||||
| 	 */ | ||||
| 	bool fail_io:1; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * These structures are used when loading metadata.  They're too | ||||
| 	 * big to put on the stack. | ||||
| 	 */ | ||||
| 	struct dm_array_cursor mapping_cursor; | ||||
| 	struct dm_array_cursor hint_cursor; | ||||
| }; | ||||
| 
 | ||||
| /*-------------------------------------------------------------------
 | ||||
|  | @ -1171,31 +1178,37 @@ static bool hints_array_available(struct dm_cache_metadata *cmd, | |||
| 		hints_array_initialized(cmd); | ||||
| } | ||||
| 
 | ||||
| static int __load_mapping(void *context, uint64_t cblock, void *leaf) | ||||
| static int __load_mapping(struct dm_cache_metadata *cmd, | ||||
| 			  uint64_t cb, bool hints_valid, | ||||
| 			  struct dm_array_cursor *mapping_cursor, | ||||
| 			  struct dm_array_cursor *hint_cursor, | ||||
| 			  load_mapping_fn fn, void *context) | ||||
| { | ||||
| 	int r = 0; | ||||
| 	bool dirty; | ||||
| 	__le64 value; | ||||
| 	__le32 hint_value = 0; | ||||
| 
 | ||||
| 	__le64 mapping; | ||||
| 	__le32 hint = 0; | ||||
| 
 | ||||
| 	__le64 *mapping_value_le; | ||||
| 	__le32 *hint_value_le; | ||||
| 
 | ||||
| 	dm_oblock_t oblock; | ||||
| 	unsigned flags; | ||||
| 	struct thunk *thunk = context; | ||||
| 	struct dm_cache_metadata *cmd = thunk->cmd; | ||||
| 
 | ||||
| 	memcpy(&value, leaf, sizeof(value)); | ||||
| 	unpack_value(value, &oblock, &flags); | ||||
| 	dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); | ||||
| 	memcpy(&mapping, mapping_value_le, sizeof(mapping)); | ||||
| 	unpack_value(mapping, &oblock, &flags); | ||||
| 
 | ||||
| 	if (flags & M_VALID) { | ||||
| 		if (thunk->hints_valid) { | ||||
| 			r = dm_array_get_value(&cmd->hint_info, cmd->hint_root, | ||||
| 					       cblock, &hint_value); | ||||
| 			if (r && r != -ENODATA) | ||||
| 				return r; | ||||
| 		if (hints_valid) { | ||||
| 			dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); | ||||
| 			memcpy(&hint, hint_value_le, sizeof(hint)); | ||||
| 		} | ||||
| 
 | ||||
| 		dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true; | ||||
| 		r = thunk->fn(thunk->context, oblock, to_cblock(cblock), | ||||
| 			      dirty, le32_to_cpu(hint_value), thunk->hints_valid); | ||||
| 		r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, | ||||
| 		       le32_to_cpu(hint), hints_valid); | ||||
| 		if (r) | ||||
| 			DMERR("policy couldn't load cblock"); | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
|  | @ -1205,16 +1218,60 @@ static int __load_mappings(struct dm_cache_metadata *cmd, | |||
| 			   struct dm_cache_policy *policy, | ||||
| 			   load_mapping_fn fn, void *context) | ||||
| { | ||||
| 	struct thunk thunk; | ||||
| 	int r; | ||||
| 	uint64_t cb; | ||||
| 
 | ||||
| 	thunk.fn = fn; | ||||
| 	thunk.context = context; | ||||
| 	bool hints_valid = hints_array_available(cmd, policy); | ||||
| 
 | ||||
| 	thunk.cmd = cmd; | ||||
| 	thunk.respect_dirty_flags = cmd->clean_when_opened; | ||||
| 	thunk.hints_valid = hints_array_available(cmd, policy); | ||||
| 	if (from_cblock(cmd->cache_blocks) == 0) | ||||
| 		/* Nothing to do */ | ||||
| 		return 0; | ||||
| 
 | ||||
| 	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); | ||||
| 	r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 
 | ||||
| 	if (hints_valid) { | ||||
| 		r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor); | ||||
| 		if (r) { | ||||
| 			dm_array_cursor_end(&cmd->mapping_cursor); | ||||
| 			return r; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	for (cb = 0; ; cb++) { | ||||
| 		r = __load_mapping(cmd, cb, hints_valid, | ||||
| 				   &cmd->mapping_cursor, &cmd->hint_cursor, | ||||
| 				   fn, context); | ||||
| 		if (r) | ||||
| 			goto out; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * We need to break out before we move the cursors. | ||||
| 		 */ | ||||
| 		if (cb >= (from_cblock(cmd->cache_blocks) - 1)) | ||||
| 			break; | ||||
| 
 | ||||
| 		r = dm_array_cursor_next(&cmd->mapping_cursor); | ||||
| 		if (r) { | ||||
| 			DMERR("dm_array_cursor_next for mapping failed"); | ||||
| 			goto out; | ||||
| 		} | ||||
| 
 | ||||
| 		if (hints_valid) { | ||||
| 			r = dm_array_cursor_next(&cmd->hint_cursor); | ||||
| 			if (r) { | ||||
| 				DMERR("dm_array_cursor_next for hint failed"); | ||||
| 				goto out; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| out: | ||||
| 	dm_array_cursor_end(&cmd->mapping_cursor); | ||||
| 	if (hints_valid) | ||||
| 		dm_array_cursor_end(&cmd->hint_cursor); | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| int dm_cache_load_mappings(struct dm_cache_metadata *cmd, | ||||
|  | @ -1368,10 +1425,24 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, | |||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) | ||||
| static int get_hint(uint32_t index, void *value_le, void *context) | ||||
| { | ||||
| 	uint32_t value; | ||||
| 	struct dm_cache_policy *policy = context; | ||||
| 
 | ||||
| 	value = policy_get_hint(policy, to_cblock(index)); | ||||
| 	*((__le32 *) value_le) = cpu_to_le32(value); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * It's quicker to always delete the hint array, and recreate with | ||||
|  * dm_array_new(). | ||||
|  */ | ||||
| static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) | ||||
| { | ||||
| 	int r; | ||||
| 	__le32 value; | ||||
| 	size_t hint_size; | ||||
| 	const char *policy_name = dm_cache_policy_get_name(policy); | ||||
| 	const unsigned *policy_version = dm_cache_policy_get_version(policy); | ||||
|  | @ -1380,7 +1451,6 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po | |||
| 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (!policy_unchanged(cmd, policy)) { | ||||
| 	strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); | ||||
| 	memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); | ||||
| 
 | ||||
|  | @ -1395,48 +1465,9 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po | |||
| 			return r; | ||||
| 	} | ||||
| 
 | ||||
| 		r = dm_array_empty(&cmd->hint_info, &cmd->hint_root); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 
 | ||||
| 		value = cpu_to_le32(0); | ||||
| 		__dm_bless_for_disk(&value); | ||||
| 		r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0, | ||||
| 	return dm_array_new(&cmd->hint_info, &cmd->hint_root, | ||||
| 			    from_cblock(cmd->cache_blocks), | ||||
| 				    &value, &cmd->hint_root); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint) | ||||
| { | ||||
| 	struct dm_cache_metadata *cmd = context; | ||||
| 	__le32 value = cpu_to_le32(hint); | ||||
| 	int r; | ||||
| 
 | ||||
| 	__dm_bless_for_disk(&value); | ||||
| 
 | ||||
| 	r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, | ||||
| 			       from_cblock(cblock), &value, &cmd->hint_root); | ||||
| 	cmd->changed = true; | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) | ||||
| { | ||||
| 	int r; | ||||
| 
 | ||||
| 	r = begin_hints(cmd, policy); | ||||
| 	if (r) { | ||||
| 		DMERR("begin_hints failed"); | ||||
| 		return r; | ||||
| 	} | ||||
| 
 | ||||
| 	return policy_walk_mappings(policy, save_hint, cmd); | ||||
| 			    get_hint, policy); | ||||
| } | ||||
| 
 | ||||
| int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) | ||||
|  |  | |||
|  | @ -395,7 +395,7 @@ static void init_policy_functions(struct policy *p) | |||
| 	p->policy.set_dirty = wb_set_dirty; | ||||
| 	p->policy.clear_dirty = wb_clear_dirty; | ||||
| 	p->policy.load_mapping = wb_load_mapping; | ||||
| 	p->policy.walk_mappings = NULL; | ||||
| 	p->policy.get_hint = NULL; | ||||
| 	p->policy.remove_mapping = wb_remove_mapping; | ||||
| 	p->policy.writeback_work = wb_writeback_work; | ||||
| 	p->policy.force_mapping = wb_force_mapping; | ||||
|  |  | |||
|  | @ -48,10 +48,10 @@ static inline int policy_load_mapping(struct dm_cache_policy *p, | |||
| 	return p->load_mapping(p, oblock, cblock, hint, hint_valid); | ||||
| } | ||||
| 
 | ||||
| static inline int policy_walk_mappings(struct dm_cache_policy *p, | ||||
| 				      policy_walk_fn fn, void *context) | ||||
| static inline uint32_t policy_get_hint(struct dm_cache_policy *p, | ||||
| 				       dm_cblock_t cblock) | ||||
| { | ||||
| 	return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0; | ||||
| 	return p->get_hint ? p->get_hint(p, cblock) : 0; | ||||
| } | ||||
| 
 | ||||
| static inline int policy_writeback_work(struct dm_cache_policy *p, | ||||
|  |  | |||
|  | @ -1359,6 +1359,11 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | |||
| 	spin_unlock_irqrestore(&mq->lock, flags); | ||||
| } | ||||
| 
 | ||||
| static unsigned random_level(dm_cblock_t cblock) | ||||
| { | ||||
| 	return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); | ||||
| } | ||||
| 
 | ||||
| static int smq_load_mapping(struct dm_cache_policy *p, | ||||
| 			    dm_oblock_t oblock, dm_cblock_t cblock, | ||||
| 			    uint32_t hint, bool hint_valid) | ||||
|  | @ -1369,47 +1374,21 @@ static int smq_load_mapping(struct dm_cache_policy *p, | |||
| 	e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock)); | ||||
| 	e->oblock = oblock; | ||||
| 	e->dirty = false;	/* this gets corrected in a minute */ | ||||
| 	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : 1; | ||||
| 	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock); | ||||
| 	push(mq, e); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int smq_save_hints(struct smq_policy *mq, struct queue *q, | ||||
| 			  policy_walk_fn fn, void *context) | ||||
| { | ||||
| 	int r; | ||||
| 	unsigned level; | ||||
| 	struct entry *e; | ||||
| 
 | ||||
| 	for (level = 0; level < q->nr_levels; level++) | ||||
| 		for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) { | ||||
| 			if (!e->sentinel) { | ||||
| 				r = fn(context, infer_cblock(mq, e), | ||||
| 				       e->oblock, e->level); | ||||
| 				if (r) | ||||
| 					return r; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, | ||||
| 			     void *context) | ||||
| static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) | ||||
| { | ||||
| 	struct smq_policy *mq = to_smq_policy(p); | ||||
| 	int r = 0; | ||||
| 	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We don't need to lock here since this method is only called once | ||||
| 	 * the IO has stopped. | ||||
| 	 */ | ||||
| 	r = smq_save_hints(mq, &mq->clean, fn, context); | ||||
| 	if (!r) | ||||
| 		r = smq_save_hints(mq, &mq->dirty, fn, context); | ||||
| 	if (!e->allocated) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	return r; | ||||
| 	return e->level; | ||||
| } | ||||
| 
 | ||||
| static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock) | ||||
|  | @ -1616,7 +1595,7 @@ static void init_policy_functions(struct smq_policy *mq, bool mimic_mq) | |||
| 	mq->policy.set_dirty = smq_set_dirty; | ||||
| 	mq->policy.clear_dirty = smq_clear_dirty; | ||||
| 	mq->policy.load_mapping = smq_load_mapping; | ||||
| 	mq->policy.walk_mappings = smq_walk_mappings; | ||||
| 	mq->policy.get_hint = smq_get_hint; | ||||
| 	mq->policy.remove_mapping = smq_remove_mapping; | ||||
| 	mq->policy.remove_cblock = smq_remove_cblock; | ||||
| 	mq->policy.writeback_work = smq_writeback_work; | ||||
|  |  | |||
|  | @ -90,9 +90,6 @@ struct policy_result { | |||
| 	dm_cblock_t cblock;	/* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */ | ||||
| }; | ||||
| 
 | ||||
| typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock, | ||||
| 			      dm_oblock_t oblock, uint32_t hint); | ||||
| 
 | ||||
| /*
 | ||||
|  * The cache policy object.  Just a bunch of methods.  It is envisaged that | ||||
|  * this structure will be embedded in a bigger, policy specific structure | ||||
|  | @ -158,8 +155,11 @@ struct dm_cache_policy { | |||
| 	int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock, | ||||
| 			    dm_cblock_t cblock, uint32_t hint, bool hint_valid); | ||||
| 
 | ||||
| 	int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn, | ||||
| 			     void *context); | ||||
| 	/*
 | ||||
| 	 * Gets the hint for a given cblock.  Called in a single threaded | ||||
| 	 * context.  So no locking required. | ||||
| 	 */ | ||||
| 	uint32_t (*get_hint)(struct dm_cache_policy *p, dm_cblock_t cblock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Override functions used on the error paths of the core target. | ||||
|  |  | |||
|  | @ -113,8 +113,7 @@ struct iv_tcw_private { | |||
|  * and encrypts / decrypts at the same time. | ||||
|  */ | ||||
| enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, | ||||
| 	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD, | ||||
| 	     DM_CRYPT_EXIT_THREAD}; | ||||
| 	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; | ||||
| 
 | ||||
| /*
 | ||||
|  * The fields in here must be read only after initialization. | ||||
|  | @ -1207,18 +1206,20 @@ continue_locked: | |||
| 		if (!RB_EMPTY_ROOT(&cc->write_tree)) | ||||
| 			goto pop_from_list; | ||||
| 
 | ||||
| 		if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) { | ||||
| 			spin_unlock_irq(&cc->write_thread_wait.lock); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		__set_current_state(TASK_INTERRUPTIBLE); | ||||
| 		set_current_state(TASK_INTERRUPTIBLE); | ||||
| 		__add_wait_queue(&cc->write_thread_wait, &wait); | ||||
| 
 | ||||
| 		spin_unlock_irq(&cc->write_thread_wait.lock); | ||||
| 
 | ||||
| 		if (unlikely(kthread_should_stop())) { | ||||
| 			set_task_state(current, TASK_RUNNING); | ||||
| 			remove_wait_queue(&cc->write_thread_wait, &wait); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		schedule(); | ||||
| 
 | ||||
| 		set_task_state(current, TASK_RUNNING); | ||||
| 		spin_lock_irq(&cc->write_thread_wait.lock); | ||||
| 		__remove_wait_queue(&cc->write_thread_wait, &wait); | ||||
| 		goto continue_locked; | ||||
|  | @ -1533,13 +1534,8 @@ static void crypt_dtr(struct dm_target *ti) | |||
| 	if (!cc) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (cc->write_thread) { | ||||
| 		spin_lock_irq(&cc->write_thread_wait.lock); | ||||
| 		set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags); | ||||
| 		wake_up_locked(&cc->write_thread_wait); | ||||
| 		spin_unlock_irq(&cc->write_thread_wait.lock); | ||||
| 	if (cc->write_thread) | ||||
| 		kthread_stop(cc->write_thread); | ||||
| 	} | ||||
| 
 | ||||
| 	if (cc->io_queue) | ||||
| 		destroy_workqueue(cc->io_queue); | ||||
|  |  | |||
|  | @ -550,9 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, | |||
| 		pgpath = choose_pgpath(m, nr_bytes); | ||||
| 
 | ||||
| 	if (!pgpath) { | ||||
| 		if (!must_push_back_rq(m)) | ||||
| 			r = -EIO;	/* Failed */ | ||||
| 		return r; | ||||
| 		if (must_push_back_rq(m)) | ||||
| 			return DM_MAPIO_DELAY_REQUEUE; | ||||
| 		return -EIO;	/* Failed */ | ||||
| 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || | ||||
| 		   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { | ||||
| 		pg_init_all_paths(m); | ||||
|  | @ -680,9 +680,11 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio) | |||
| 	return __multipath_map_bio(m, bio, mpio); | ||||
| } | ||||
| 
 | ||||
| static void process_queued_bios_list(struct multipath *m) | ||||
| static void process_queued_io_list(struct multipath *m) | ||||
| { | ||||
| 	if (m->queue_mode == DM_TYPE_BIO_BASED) | ||||
| 	if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) | ||||
| 		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table)); | ||||
| 	else if (m->queue_mode == DM_TYPE_BIO_BASED) | ||||
| 		queue_work(kmultipathd, &m->process_queued_bios); | ||||
| } | ||||
| 
 | ||||
|  | @ -752,7 +754,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, | |||
| 
 | ||||
| 	if (!queue_if_no_path) { | ||||
| 		dm_table_run_md_queue_async(m->ti->table); | ||||
| 		process_queued_bios_list(m); | ||||
| 		process_queued_io_list(m); | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
|  | @ -1193,21 +1195,17 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 
 | ||||
| static void multipath_wait_for_pg_init_completion(struct multipath *m) | ||||
| { | ||||
| 	DECLARE_WAITQUEUE(wait, current); | ||||
| 
 | ||||
| 	add_wait_queue(&m->pg_init_wait, &wait); | ||||
| 	DEFINE_WAIT(wait); | ||||
| 
 | ||||
| 	while (1) { | ||||
| 		set_current_state(TASK_UNINTERRUPTIBLE); | ||||
| 		prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE); | ||||
| 
 | ||||
| 		if (!atomic_read(&m->pg_init_in_progress)) | ||||
| 			break; | ||||
| 
 | ||||
| 		io_schedule(); | ||||
| 	} | ||||
| 	set_current_state(TASK_RUNNING); | ||||
| 
 | ||||
| 	remove_wait_queue(&m->pg_init_wait, &wait); | ||||
| 	finish_wait(&m->pg_init_wait, &wait); | ||||
| } | ||||
| 
 | ||||
| static void flush_multipath_work(struct multipath *m) | ||||
|  | @ -1308,7 +1306,7 @@ out: | |||
| 	spin_unlock_irqrestore(&m->lock, flags); | ||||
| 	if (run_queue) { | ||||
| 		dm_table_run_md_queue_async(m->ti->table); | ||||
| 		process_queued_bios_list(m); | ||||
| 		process_queued_io_list(m); | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
|  | @ -1506,7 +1504,7 @@ static void pg_init_done(void *data, int errors) | |||
| 	} | ||||
| 	clear_bit(MPATHF_QUEUE_IO, &m->flags); | ||||
| 
 | ||||
| 	process_queued_bios_list(m); | ||||
| 	process_queued_io_list(m); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Wake up any thread waiting to suspend. | ||||
|  | @ -1521,10 +1519,10 @@ static void activate_path(struct work_struct *work) | |||
| { | ||||
| 	struct pgpath *pgpath = | ||||
| 		container_of(work, struct pgpath, activate_path.work); | ||||
| 	struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); | ||||
| 
 | ||||
| 	if (pgpath->is_active) | ||||
| 		scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), | ||||
| 				 pg_init_done, pgpath); | ||||
| 	if (pgpath->is_active && !blk_queue_dying(q)) | ||||
| 		scsi_dh_activate(q, pg_init_done, pgpath); | ||||
| 	else | ||||
| 		pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); | ||||
| } | ||||
|  | @ -1532,6 +1530,14 @@ static void activate_path(struct work_struct *work) | |||
| static int noretry_error(int error) | ||||
| { | ||||
| 	switch (error) { | ||||
| 	case -EBADE: | ||||
| 		/*
 | ||||
| 		 * EBADE signals an reservation conflict. | ||||
| 		 * We shouldn't fail the path here as we can communicate with | ||||
| 		 * the target.  We should failover to the next path, but in | ||||
| 		 * doing so we might be causing a ping-pong between paths. | ||||
| 		 * So just return the reservation conflict error. | ||||
| 		 */ | ||||
| 	case -EOPNOTSUPP: | ||||
| 	case -EREMOTEIO: | ||||
| 	case -EILSEQ: | ||||
|  | @ -1576,9 +1582,6 @@ static int do_end_io(struct multipath *m, struct request *clone, | |||
| 		if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { | ||||
| 			if (!must_push_back_rq(m)) | ||||
| 				r = -EIO; | ||||
| 		} else { | ||||
| 			if (error == -EBADE) | ||||
| 				r = error; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  | @ -1627,9 +1630,6 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone, | |||
| 			if (!must_push_back_bio(m)) | ||||
| 				return -EIO; | ||||
| 			return DM_ENDIO_REQUEUE; | ||||
| 		} else { | ||||
| 			if (error == -EBADE) | ||||
| 				return error; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  | @ -1941,7 +1941,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, | |||
| 		if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) | ||||
| 			pg_init_all_paths(m); | ||||
| 		dm_table_run_md_queue_async(m->ti->table); | ||||
| 		process_queued_bios_list(m); | ||||
| 		process_queued_io_list(m); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -1994,11 +1994,14 @@ static int multipath_busy(struct dm_target *ti) | |||
| 	struct priority_group *pg, *next_pg; | ||||
| 	struct pgpath *pgpath; | ||||
| 
 | ||||
| 	/* pg_init in progress or no paths available */ | ||||
| 	if (atomic_read(&m->pg_init_in_progress) || | ||||
| 	    (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) | ||||
| 	/* pg_init in progress */ | ||||
| 	if (atomic_read(&m->pg_init_in_progress)) | ||||
| 		return true; | ||||
| 
 | ||||
| 	/* no paths available, for blk-mq: rely on IO mapping to delay requeue */ | ||||
| 	if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) | ||||
| 		return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); | ||||
| 
 | ||||
| 	/* Guess which priority_group will be used at next mapping time */ | ||||
| 	pg = lockless_dereference(m->current_pg); | ||||
| 	next_pg = lockless_dereference(m->next_pg); | ||||
|  |  | |||
|  | @ -73,43 +73,60 @@ static void dm_old_start_queue(struct request_queue *q) | |||
| 	spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| } | ||||
| 
 | ||||
| static void dm_mq_start_queue(struct request_queue *q) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(q->queue_lock, flags); | ||||
| 	queue_flag_clear(QUEUE_FLAG_STOPPED, q); | ||||
| 	spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| 
 | ||||
| 	blk_mq_start_stopped_hw_queues(q, true); | ||||
| 	blk_mq_kick_requeue_list(q); | ||||
| } | ||||
| 
 | ||||
| void dm_start_queue(struct request_queue *q) | ||||
| { | ||||
| 	if (!q->mq_ops) | ||||
| 		dm_old_start_queue(q); | ||||
| 	else { | ||||
| 		queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q); | ||||
| 		blk_mq_start_stopped_hw_queues(q, true); | ||||
| 		blk_mq_kick_requeue_list(q); | ||||
| 	} | ||||
| 	else | ||||
| 		dm_mq_start_queue(q); | ||||
| } | ||||
| 
 | ||||
| static void dm_old_stop_queue(struct request_queue *q) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(q->queue_lock, flags); | ||||
| 	if (!blk_queue_stopped(q)) | ||||
| 		blk_stop_queue(q); | ||||
| 	spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| } | ||||
| 
 | ||||
| static void dm_mq_stop_queue(struct request_queue *q) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(q->queue_lock, flags); | ||||
| 	if (blk_queue_stopped(q)) { | ||||
| 		spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	blk_stop_queue(q); | ||||
| 	queue_flag_set(QUEUE_FLAG_STOPPED, q); | ||||
| 	spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| 
 | ||||
| 	/* Avoid that requeuing could restart the queue. */ | ||||
| 	blk_mq_cancel_requeue_work(q); | ||||
| 	blk_mq_stop_hw_queues(q); | ||||
| } | ||||
| 
 | ||||
| void dm_stop_queue(struct request_queue *q) | ||||
| { | ||||
| 	if (!q->mq_ops) | ||||
| 		dm_old_stop_queue(q); | ||||
| 	else { | ||||
| 		spin_lock_irq(q->queue_lock); | ||||
| 		queue_flag_set(QUEUE_FLAG_STOPPED, q); | ||||
| 		spin_unlock_irq(q->queue_lock); | ||||
| 
 | ||||
| 		blk_mq_cancel_requeue_work(q); | ||||
| 		blk_mq_stop_hw_queues(q); | ||||
| 	} | ||||
| 	else | ||||
| 		dm_mq_stop_queue(q); | ||||
| } | ||||
| 
 | ||||
| static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, | ||||
|  | @ -319,21 +336,32 @@ static void dm_old_requeue_request(struct request *rq) | |||
| 	spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| } | ||||
| 
 | ||||
| static void dm_mq_requeue_request(struct request *rq) | ||||
| static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) | ||||
| { | ||||
| 	struct request_queue *q = rq->q; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	blk_mq_requeue_request(rq); | ||||
| 	spin_lock_irqsave(q->queue_lock, flags); | ||||
| 	if (!blk_queue_stopped(q)) | ||||
| 		blk_mq_kick_requeue_list(q); | ||||
| 		blk_mq_delay_kick_requeue_list(q, msecs); | ||||
| 	spin_unlock_irqrestore(q->queue_lock, flags); | ||||
| } | ||||
| 
 | ||||
| static void dm_requeue_original_request(struct mapped_device *md, | ||||
| 					struct request *rq) | ||||
| void dm_mq_kick_requeue_list(struct mapped_device *md) | ||||
| { | ||||
| 	__dm_mq_kick_requeue_list(dm_get_md_queue(md), 0); | ||||
| } | ||||
| EXPORT_SYMBOL(dm_mq_kick_requeue_list); | ||||
| 
 | ||||
| static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs) | ||||
| { | ||||
| 	blk_mq_requeue_request(rq); | ||||
| 	__dm_mq_kick_requeue_list(rq->q, msecs); | ||||
| } | ||||
| 
 | ||||
| static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue) | ||||
| { | ||||
| 	struct mapped_device *md = tio->md; | ||||
| 	struct request *rq = tio->orig; | ||||
| 	int rw = rq_data_dir(rq); | ||||
| 
 | ||||
| 	rq_end_stats(md, rq); | ||||
|  | @ -342,7 +370,7 @@ static void dm_requeue_original_request(struct mapped_device *md, | |||
| 	if (!rq->q->mq_ops) | ||||
| 		dm_old_requeue_request(rq); | ||||
| 	else | ||||
| 		dm_mq_requeue_request(rq); | ||||
| 		dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0); | ||||
| 
 | ||||
| 	rq_completed(md, rw, false); | ||||
| } | ||||
|  | @ -372,7 +400,7 @@ static void dm_done(struct request *clone, int error, bool mapped) | |||
| 		return; | ||||
| 	else if (r == DM_ENDIO_REQUEUE) | ||||
| 		/* The target wants to requeue the I/O */ | ||||
| 		dm_requeue_original_request(tio->md, tio->orig); | ||||
| 		dm_requeue_original_request(tio, false); | ||||
| 	else { | ||||
| 		DMWARN("unimplemented target endio return value: %d", r); | ||||
| 		BUG(); | ||||
|  | @ -612,20 +640,23 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq) | |||
| 
 | ||||
| /*
 | ||||
|  * Returns: | ||||
|  * 0                : the request has been processed | ||||
|  * DM_MAPIO_REQUEUE : the original request needs to be requeued | ||||
|  * DM_MAPIO_*       : the request has been processed as indicated | ||||
|  * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued | ||||
|  * < 0              : the request was completed due to failure | ||||
|  */ | ||||
| static int map_request(struct dm_rq_target_io *tio, struct request *rq, | ||||
| 		       struct mapped_device *md) | ||||
| static int map_request(struct dm_rq_target_io *tio) | ||||
| { | ||||
| 	int r; | ||||
| 	struct dm_target *ti = tio->ti; | ||||
| 	struct mapped_device *md = tio->md; | ||||
| 	struct request *rq = tio->orig; | ||||
| 	struct request *clone = NULL; | ||||
| 
 | ||||
| 	if (tio->clone) { | ||||
| 		clone = tio->clone; | ||||
| 		r = ti->type->map_rq(ti, clone, &tio->info); | ||||
| 		if (r == DM_MAPIO_DELAY_REQUEUE) | ||||
| 			return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */ | ||||
| 	} else { | ||||
| 		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); | ||||
| 		if (r < 0) { | ||||
|  | @ -633,9 +664,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, | |||
| 			dm_kill_unmapped_request(rq, r); | ||||
| 			return r; | ||||
| 		} | ||||
| 		if (r != DM_MAPIO_REMAPPED) | ||||
| 			return r; | ||||
| 		if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { | ||||
| 		if (r == DM_MAPIO_REMAPPED && | ||||
| 		    setup_clone(clone, rq, tio, GFP_ATOMIC)) { | ||||
| 			/* -ENOMEM */ | ||||
| 			ti->type->release_clone_rq(clone); | ||||
| 			return DM_MAPIO_REQUEUE; | ||||
|  | @ -654,7 +684,10 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, | |||
| 		break; | ||||
| 	case DM_MAPIO_REQUEUE: | ||||
| 		/* The target wants to requeue the I/O */ | ||||
| 		dm_requeue_original_request(md, tio->orig); | ||||
| 		break; | ||||
| 	case DM_MAPIO_DELAY_REQUEUE: | ||||
| 		/* The target wants to requeue the I/O after a delay */ | ||||
| 		dm_requeue_original_request(tio, true); | ||||
| 		break; | ||||
| 	default: | ||||
| 		if (r > 0) { | ||||
|  | @ -664,10 +697,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, | |||
| 
 | ||||
| 		/* The target wants to complete the I/O */ | ||||
| 		dm_kill_unmapped_request(rq, r); | ||||
| 		return r; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| static void dm_start_request(struct mapped_device *md, struct request *orig) | ||||
|  | @ -706,11 +738,9 @@ static void dm_start_request(struct mapped_device *md, struct request *orig) | |||
| static void map_tio_request(struct kthread_work *work) | ||||
| { | ||||
| 	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); | ||||
| 	struct request *rq = tio->orig; | ||||
| 	struct mapped_device *md = tio->md; | ||||
| 
 | ||||
| 	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) | ||||
| 		dm_requeue_original_request(md, rq); | ||||
| 	if (map_request(tio) == DM_MAPIO_REQUEUE) | ||||
| 		dm_requeue_original_request(tio, false); | ||||
| } | ||||
| 
 | ||||
| ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) | ||||
|  | @ -896,7 +926,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
| 	tio->ti = ti; | ||||
| 
 | ||||
| 	/* Direct call is fine since .queue_rq allows allocations */ | ||||
| 	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { | ||||
| 	if (map_request(tio) == DM_MAPIO_REQUEUE) { | ||||
| 		/* Undo dm_start_request() before requeuing */ | ||||
| 		rq_end_stats(md, rq); | ||||
| 		rq_completed(md, rq_data_dir(rq), false); | ||||
|  |  | |||
|  | @ -55,6 +55,8 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md); | |||
| void dm_start_queue(struct request_queue *q); | ||||
| void dm_stop_queue(struct request_queue *q); | ||||
| 
 | ||||
| void dm_mq_kick_requeue_list(struct mapped_device *md); | ||||
| 
 | ||||
| unsigned dm_get_reserved_rq_based_ios(void); | ||||
| 
 | ||||
| ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf); | ||||
|  |  | |||
|  | @ -1648,6 +1648,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
| 	struct request_queue *q = md->queue; | ||||
| 	sector_t size; | ||||
| 
 | ||||
| 	lockdep_assert_held(&md->suspend_lock); | ||||
| 
 | ||||
| 	size = dm_table_get_size(t); | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -1873,6 +1875,7 @@ EXPORT_SYMBOL_GPL(dm_device_name); | |||
| 
 | ||||
| static void __dm_destroy(struct mapped_device *md, bool wait) | ||||
| { | ||||
| 	struct request_queue *q = dm_get_md_queue(md); | ||||
| 	struct dm_table *map; | ||||
| 	int srcu_idx; | ||||
| 
 | ||||
|  | @ -1883,6 +1886,10 @@ static void __dm_destroy(struct mapped_device *md, bool wait) | |||
| 	set_bit(DMF_FREEING, &md->flags); | ||||
| 	spin_unlock(&_minor_lock); | ||||
| 
 | ||||
| 	spin_lock_irq(q->queue_lock); | ||||
| 	queue_flag_set(QUEUE_FLAG_DYING, q); | ||||
| 	spin_unlock_irq(q->queue_lock); | ||||
| 
 | ||||
| 	if (dm_request_based(md) && md->kworker_task) | ||||
| 		flush_kthread_worker(&md->kworker); | ||||
| 
 | ||||
|  | @ -1934,30 +1941,25 @@ void dm_put(struct mapped_device *md) | |||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_put); | ||||
| 
 | ||||
| static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | ||||
| static int dm_wait_for_completion(struct mapped_device *md, long task_state) | ||||
| { | ||||
| 	int r = 0; | ||||
| 	DECLARE_WAITQUEUE(wait, current); | ||||
| 
 | ||||
| 	add_wait_queue(&md->wait, &wait); | ||||
| 	DEFINE_WAIT(wait); | ||||
| 
 | ||||
| 	while (1) { | ||||
| 		set_current_state(interruptible); | ||||
| 		prepare_to_wait(&md->wait, &wait, task_state); | ||||
| 
 | ||||
| 		if (!md_in_flight(md)) | ||||
| 			break; | ||||
| 
 | ||||
| 		if (interruptible == TASK_INTERRUPTIBLE && | ||||
| 		    signal_pending(current)) { | ||||
| 		if (signal_pending_state(task_state, current)) { | ||||
| 			r = -EINTR; | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		io_schedule(); | ||||
| 	} | ||||
| 	set_current_state(TASK_RUNNING); | ||||
| 
 | ||||
| 	remove_wait_queue(&md->wait, &wait); | ||||
| 	finish_wait(&md->wait, &wait); | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
|  | @ -2075,6 +2077,10 @@ static void unlock_fs(struct mapped_device *md) | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG | ||||
|  * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE | ||||
|  * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY | ||||
|  * | ||||
|  * If __dm_suspend returns 0, the device is completely quiescent | ||||
|  * now. There is no request-processing activity. All new requests | ||||
|  * are being added to md->deferred list. | ||||
|  | @ -2082,13 +2088,15 @@ static void unlock_fs(struct mapped_device *md) | |||
|  * Caller must hold md->suspend_lock | ||||
|  */ | ||||
| static int __dm_suspend(struct mapped_device *md, struct dm_table *map, | ||||
| 			unsigned suspend_flags, int interruptible, | ||||
| 			unsigned suspend_flags, long task_state, | ||||
| 			int dmf_suspended_flag) | ||||
| { | ||||
| 	bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; | ||||
| 	bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; | ||||
| 	int r; | ||||
| 
 | ||||
| 	lockdep_assert_held(&md->suspend_lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | ||||
| 	 * This flag is cleared before dm_suspend returns. | ||||
|  | @ -2149,7 +2157,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, | |||
| 	 * We call dm_wait_for_completion to wait for all existing requests | ||||
| 	 * to finish. | ||||
| 	 */ | ||||
| 	r = dm_wait_for_completion(md, interruptible); | ||||
| 	r = dm_wait_for_completion(md, task_state); | ||||
| 	if (!r) | ||||
| 		set_bit(dmf_suspended_flag, &md->flags); | ||||
| 
 | ||||
|  | @ -2249,10 +2257,11 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map) | |||
| 
 | ||||
| int dm_resume(struct mapped_device *md) | ||||
| { | ||||
| 	int r = -EINVAL; | ||||
| 	int r; | ||||
| 	struct dm_table *map = NULL; | ||||
| 
 | ||||
| retry: | ||||
| 	r = -EINVAL; | ||||
| 	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); | ||||
| 
 | ||||
| 	if (!dm_suspended_md(md)) | ||||
|  | @ -2276,8 +2285,6 @@ retry: | |||
| 		goto out; | ||||
| 
 | ||||
| 	clear_bit(DMF_SUSPENDED, &md->flags); | ||||
| 
 | ||||
| 	r = 0; | ||||
| out: | ||||
| 	mutex_unlock(&md->suspend_lock); | ||||
| 
 | ||||
|  |  | |||
|  | @ -277,6 +277,48 @@ static int insert_ablock(struct dm_array_info *info, uint64_t index, | |||
| 	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); | ||||
| } | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| static int __shadow_ablock(struct dm_array_info *info, dm_block_t b, | ||||
| 			   struct dm_block **block, struct array_block **ab) | ||||
| { | ||||
| 	int inc; | ||||
| 	int r = dm_tm_shadow_block(info->btree_info.tm, b, | ||||
| 				   &array_validator, block, &inc); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 
 | ||||
| 	*ab = dm_block_data(*block); | ||||
| 	if (inc) | ||||
| 		inc_ablock_entries(info, *ab); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The shadow op will often be a noop.  Only insert if it really | ||||
|  * copied data. | ||||
|  */ | ||||
| static int __reinsert_ablock(struct dm_array_info *info, unsigned index, | ||||
| 			     struct dm_block *block, dm_block_t b, | ||||
| 			     dm_block_t *root) | ||||
| { | ||||
| 	int r = 0; | ||||
| 
 | ||||
| 	if (dm_block_location(block) != b) { | ||||
| 		/*
 | ||||
| 		 * dm_tm_shadow_block will have already decremented the old | ||||
| 		 * block, but it is still referenced by the btree.  We | ||||
| 		 * increment to stop the insert decrementing it below zero | ||||
| 		 * when overwriting the old value. | ||||
| 		 */ | ||||
| 		dm_tm_inc(info->btree_info.tm, b); | ||||
| 		r = insert_ablock(info, index, block, root); | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Looks up an array block in the btree.  Then shadows it, and updates the | ||||
|  * btree to point to this new shadow.  'root' is an input/output parameter | ||||
|  | @ -286,49 +328,21 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, | |||
| 			 unsigned index, struct dm_block **block, | ||||
| 			 struct array_block **ab) | ||||
| { | ||||
| 	int r, inc; | ||||
| 	int r; | ||||
| 	uint64_t key = index; | ||||
| 	dm_block_t b; | ||||
| 	__le64 block_le; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * lookup | ||||
| 	 */ | ||||
| 	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 	b = le64_to_cpu(block_le); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * shadow | ||||
| 	 */ | ||||
| 	r = dm_tm_shadow_block(info->btree_info.tm, b, | ||||
| 			       &array_validator, block, &inc); | ||||
| 	r = __shadow_ablock(info, b, block, ab); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 
 | ||||
| 	*ab = dm_block_data(*block); | ||||
| 	if (inc) | ||||
| 		inc_ablock_entries(info, *ab); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Reinsert. | ||||
| 	 * | ||||
| 	 * The shadow op will often be a noop.  Only insert if it really | ||||
| 	 * copied data. | ||||
| 	 */ | ||||
| 	if (dm_block_location(*block) != b) { | ||||
| 		/*
 | ||||
| 		 * dm_tm_shadow_block will have already decremented the old | ||||
| 		 * block, but it is still referenced by the btree.  We | ||||
| 		 * increment to stop the insert decrementing it below zero | ||||
| 		 * when overwriting the old value. | ||||
| 		 */ | ||||
| 		dm_tm_inc(info->btree_info.tm, b); | ||||
| 		r = insert_ablock(info, index, *block, root); | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
| 	return __reinsert_ablock(info, index, *block, b, root); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -681,6 +695,72 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root, | |||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_array_resize); | ||||
| 
 | ||||
| static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab, | ||||
| 				       value_fn fn, void *context, unsigned base, unsigned new_nr) | ||||
| { | ||||
| 	int r; | ||||
| 	unsigned i; | ||||
| 	uint32_t nr_entries; | ||||
| 	struct dm_btree_value_type *vt = &info->value_type; | ||||
| 
 | ||||
| 	BUG_ON(le32_to_cpu(ab->nr_entries)); | ||||
| 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); | ||||
| 
 | ||||
| 	nr_entries = le32_to_cpu(ab->nr_entries); | ||||
| 	for (i = 0; i < new_nr; i++) { | ||||
| 		r = fn(base + i, element_at(info, ab, i), context); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 
 | ||||
| 		if (vt->inc) | ||||
| 			vt->inc(vt->context, element_at(info, ab, i)); | ||||
| 	} | ||||
| 
 | ||||
| 	ab->nr_entries = cpu_to_le32(new_nr); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int dm_array_new(struct dm_array_info *info, dm_block_t *root, | ||||
| 		 uint32_t size, value_fn fn, void *context) | ||||
| { | ||||
| 	int r; | ||||
| 	struct dm_block *block; | ||||
| 	struct array_block *ab; | ||||
| 	unsigned block_index, end_block, size_of_block, max_entries; | ||||
| 
 | ||||
| 	r = dm_array_empty(info, root); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 
 | ||||
| 	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); | ||||
| 	max_entries = calc_max_entries(info->value_type.size, size_of_block); | ||||
| 	end_block = dm_div_up(size, max_entries); | ||||
| 
 | ||||
| 	for (block_index = 0; block_index != end_block; block_index++) { | ||||
| 		r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); | ||||
| 		if (r) | ||||
| 			break; | ||||
| 
 | ||||
| 		r = populate_ablock_with_values(info, ab, fn, context, | ||||
| 						block_index * max_entries, | ||||
| 						min(max_entries, size)); | ||||
| 		if (r) { | ||||
| 			unlock_ablock(info, block); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		r = insert_ablock(info, block_index, block, root); | ||||
| 		unlock_ablock(info, block); | ||||
| 		if (r) | ||||
| 			break; | ||||
| 
 | ||||
| 		size -= max_entries; | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_array_new); | ||||
| 
 | ||||
| int dm_array_del(struct dm_array_info *info, dm_block_t root) | ||||
| { | ||||
| 	return dm_btree_del(&info->btree_info, root); | ||||
|  | @ -819,3 +899,89 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root, | |||
| EXPORT_SYMBOL_GPL(dm_array_walk); | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| static int load_ablock(struct dm_array_cursor *c) | ||||
| { | ||||
| 	int r; | ||||
| 	__le64 value_le; | ||||
| 	uint64_t key; | ||||
| 
 | ||||
| 	if (c->block) | ||||
| 		unlock_ablock(c->info, c->block); | ||||
| 
 | ||||
| 	c->block = NULL; | ||||
| 	c->ab = NULL; | ||||
| 	c->index = 0; | ||||
| 
 | ||||
| 	r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le); | ||||
| 	if (r) { | ||||
| 		DMERR("dm_btree_cursor_get_value failed"); | ||||
| 		dm_btree_cursor_end(&c->cursor); | ||||
| 
 | ||||
| 	} else { | ||||
| 		r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab); | ||||
| 		if (r) { | ||||
| 			DMERR("get_ablock failed"); | ||||
| 			dm_btree_cursor_end(&c->cursor); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root, | ||||
| 			  struct dm_array_cursor *c) | ||||
| { | ||||
| 	int r; | ||||
| 
 | ||||
| 	memset(c, 0, sizeof(*c)); | ||||
| 	c->info = info; | ||||
| 	r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor); | ||||
| 	if (r) { | ||||
| 		DMERR("couldn't create btree cursor"); | ||||
| 		return r; | ||||
| 	} | ||||
| 
 | ||||
| 	return load_ablock(c); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_array_cursor_begin); | ||||
| 
 | ||||
| void dm_array_cursor_end(struct dm_array_cursor *c) | ||||
| { | ||||
| 	if (c->block) { | ||||
| 		unlock_ablock(c->info, c->block); | ||||
| 		dm_btree_cursor_end(&c->cursor); | ||||
| 	} | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_array_cursor_end); | ||||
| 
 | ||||
| int dm_array_cursor_next(struct dm_array_cursor *c) | ||||
| { | ||||
| 	int r; | ||||
| 
 | ||||
| 	if (!c->block) | ||||
| 		return -ENODATA; | ||||
| 
 | ||||
| 	c->index++; | ||||
| 
 | ||||
| 	if (c->index >= le32_to_cpu(c->ab->nr_entries)) { | ||||
| 		r = dm_btree_cursor_next(&c->cursor); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 
 | ||||
| 		r = load_ablock(c); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_array_cursor_next); | ||||
| 
 | ||||
| void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le) | ||||
| { | ||||
| 	*value_le = element_at(c->info, c->ab, c->index); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_array_cursor_get_value); | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
|  |  | |||
|  | @ -111,6 +111,25 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root, | |||
| 		    const void *value, dm_block_t *new_root) | ||||
| 	__dm_written_to_disk(value); | ||||
| 
 | ||||
| /*
 | ||||
|  * Creates a new array populated with values provided by a callback | ||||
|  * function.  This is more efficient than creating an empty array, | ||||
|  * resizing, and then setting values since that process incurs a lot of | ||||
|  * copying. | ||||
|  * | ||||
|  * Assumes 32bit values for now since it's only used by the cache hint | ||||
|  * array. | ||||
|  * | ||||
|  * info - describes the array | ||||
|  * root - the root block of the array on disk | ||||
|  * size - the number of entries in the array | ||||
|  * fn - the callback | ||||
|  * context - passed to the callback | ||||
|  */ | ||||
| typedef int (*value_fn)(uint32_t index, void *value_le, void *context); | ||||
| int dm_array_new(struct dm_array_info *info, dm_block_t *root, | ||||
| 		 uint32_t size, value_fn fn, void *context); | ||||
| 
 | ||||
| /*
 | ||||
|  * Frees a whole array.  The value_type's decrement operation will be called | ||||
|  * for all values in the array | ||||
|  | @ -163,4 +182,37 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root, | |||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| /*
 | ||||
|  * Cursor api. | ||||
|  * | ||||
|  * This lets you iterate through all the entries in an array efficiently | ||||
|  * (it will preload metadata). | ||||
|  * | ||||
|  * I'm using a cursor, rather than a walk function with a callback because | ||||
|  * the cache target needs to iterate both the mapping and hint arrays in | ||||
|  * unison. | ||||
|  */ | ||||
| struct dm_array_cursor { | ||||
| 	struct dm_array_info *info; | ||||
| 	struct dm_btree_cursor cursor; | ||||
| 
 | ||||
| 	struct dm_block *block; | ||||
| 	struct array_block *ab; | ||||
| 	unsigned index; | ||||
| }; | ||||
| 
 | ||||
| int dm_array_cursor_begin(struct dm_array_info *info, | ||||
| 			  dm_block_t root, struct dm_array_cursor *c); | ||||
| void dm_array_cursor_end(struct dm_array_cursor *c); | ||||
| 
 | ||||
| uint32_t dm_array_cursor_index(struct dm_array_cursor *c); | ||||
| int dm_array_cursor_next(struct dm_array_cursor *c); | ||||
| 
 | ||||
| /*
 | ||||
|  * value_le is only valid while the cursor points at the current value. | ||||
|  */ | ||||
| void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le); | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| #endif	/* _LINUX_DM_ARRAY_H */ | ||||
|  |  | |||
|  | @ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, | |||
| 	return walk_node(info, root, fn, context); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_btree_walk); | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| static void prefetch_values(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	unsigned i, nr; | ||||
| 	__le64 value_le; | ||||
| 	struct cursor_node *n = c->nodes + c->depth - 1; | ||||
| 	struct btree_node *bn = dm_block_data(n->b); | ||||
| 	struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm); | ||||
| 
 | ||||
| 	BUG_ON(c->info->value_type.size != sizeof(value_le)); | ||||
| 
 | ||||
| 	nr = le32_to_cpu(bn->header.nr_entries); | ||||
| 	for (i = 0; i < nr; i++) { | ||||
| 		memcpy(&value_le, value_ptr(bn, i), sizeof(value_le)); | ||||
| 		dm_bm_prefetch(bm, le64_to_cpu(value_le)); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static bool leaf_node(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	struct cursor_node *n = c->nodes + c->depth - 1; | ||||
| 	struct btree_node *bn = dm_block_data(n->b); | ||||
| 
 | ||||
| 	return le32_to_cpu(bn->header.flags) & LEAF_NODE; | ||||
| } | ||||
| 
 | ||||
| static int push_node(struct dm_btree_cursor *c, dm_block_t b) | ||||
| { | ||||
| 	int r; | ||||
| 	struct cursor_node *n = c->nodes + c->depth; | ||||
| 
 | ||||
| 	if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) { | ||||
| 		DMERR("couldn't push cursor node, stack depth too high"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	r = bn_read_lock(c->info, b, &n->b); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 
 | ||||
| 	n->index = 0; | ||||
| 	c->depth++; | ||||
| 
 | ||||
| 	if (c->prefetch_leaves || !leaf_node(c)) | ||||
| 		prefetch_values(c); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void pop_node(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	c->depth--; | ||||
| 	unlock_block(c->info, c->nodes[c->depth].b); | ||||
| } | ||||
| 
 | ||||
| static int inc_or_backtrack(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	struct cursor_node *n; | ||||
| 	struct btree_node *bn; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		if (!c->depth) | ||||
| 			return -ENODATA; | ||||
| 
 | ||||
| 		n = c->nodes + c->depth - 1; | ||||
| 		bn = dm_block_data(n->b); | ||||
| 
 | ||||
| 		n->index++; | ||||
| 		if (n->index < le32_to_cpu(bn->header.nr_entries)) | ||||
| 			break; | ||||
| 
 | ||||
| 		pop_node(c); | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int find_leaf(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	int r = 0; | ||||
| 	struct cursor_node *n; | ||||
| 	struct btree_node *bn; | ||||
| 	__le64 value_le; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		n = c->nodes + c->depth - 1; | ||||
| 		bn = dm_block_data(n->b); | ||||
| 
 | ||||
| 		if (le32_to_cpu(bn->header.flags) & LEAF_NODE) | ||||
| 			break; | ||||
| 
 | ||||
| 		memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le)); | ||||
| 		r = push_node(c, le64_to_cpu(value_le)); | ||||
| 		if (r) { | ||||
| 			DMERR("push_node failed"); | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (!r && (le32_to_cpu(bn->header.nr_entries) == 0)) | ||||
| 		return -ENODATA; | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, | ||||
| 			  bool prefetch_leaves, struct dm_btree_cursor *c) | ||||
| { | ||||
| 	int r; | ||||
| 
 | ||||
| 	c->info = info; | ||||
| 	c->root = root; | ||||
| 	c->depth = 0; | ||||
| 	c->prefetch_leaves = prefetch_leaves; | ||||
| 
 | ||||
| 	r = push_node(c, root); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 
 | ||||
| 	return find_leaf(c); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_btree_cursor_begin); | ||||
| 
 | ||||
| void dm_btree_cursor_end(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	while (c->depth) | ||||
| 		pop_node(c); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_btree_cursor_end); | ||||
| 
 | ||||
| int dm_btree_cursor_next(struct dm_btree_cursor *c) | ||||
| { | ||||
| 	int r = inc_or_backtrack(c); | ||||
| 	if (!r) { | ||||
| 		r = find_leaf(c); | ||||
| 		if (r) | ||||
| 			DMERR("find_leaf failed"); | ||||
| 	} | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_btree_cursor_next); | ||||
| 
 | ||||
| int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) | ||||
| { | ||||
| 	if (c->depth) { | ||||
| 		struct cursor_node *n = c->nodes + c->depth - 1; | ||||
| 		struct btree_node *bn = dm_block_data(n->b); | ||||
| 
 | ||||
| 		if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE) | ||||
| 			return -EINVAL; | ||||
| 
 | ||||
| 		*key = le64_to_cpu(*key_ptr(bn, n->index)); | ||||
| 		memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size); | ||||
| 		return 0; | ||||
| 
 | ||||
| 	} else | ||||
| 		return -ENODATA; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value); | ||||
|  |  | |||
|  | @ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, | |||
| 		  int (*fn)(void *context, uint64_t *keys, void *leaf), | ||||
| 		  void *context); | ||||
| 
 | ||||
| 
 | ||||
| /*----------------------------------------------------------------*/ | ||||
| 
 | ||||
| /*
 | ||||
|  * Cursor API.  This does not follow the rolling lock convention.  Since we | ||||
|  * know the order that values are required we can issue prefetches to speed | ||||
|  * up iteration.  Use on a single level btree only. | ||||
|  */ | ||||
| #define DM_BTREE_CURSOR_MAX_DEPTH 16 | ||||
| 
 | ||||
| struct cursor_node { | ||||
| 	struct dm_block *b; | ||||
| 	unsigned index; | ||||
| }; | ||||
| 
 | ||||
| struct dm_btree_cursor { | ||||
| 	struct dm_btree_info *info; | ||||
| 	dm_block_t root; | ||||
| 
 | ||||
| 	bool prefetch_leaves; | ||||
| 	unsigned depth; | ||||
| 	struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Creates a fresh cursor.  If prefetch_leaves is set then it is assumed | ||||
|  * the btree contains block indexes that will be prefetched.  The cursor is | ||||
|  * quite large, so you probably don't want to put it on the stack. | ||||
|  */ | ||||
| int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, | ||||
| 			  bool prefetch_leaves, struct dm_btree_cursor *c); | ||||
| void dm_btree_cursor_end(struct dm_btree_cursor *c); | ||||
| int dm_btree_cursor_next(struct dm_btree_cursor *c); | ||||
| int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); | ||||
| 
 | ||||
| #endif	/* _LINUX_DM_BTREE_H */ | ||||
|  |  | |||
|  | @ -590,6 +590,7 @@ extern struct ratelimit_state dm_ratelimit_state; | |||
| #define DM_MAPIO_SUBMITTED	0 | ||||
| #define DM_MAPIO_REMAPPED	1 | ||||
| #define DM_MAPIO_REQUEUE	DM_ENDIO_REQUEUE | ||||
| #define DM_MAPIO_DELAY_REQUEUE	3 | ||||
| 
 | ||||
| #define dm_sector_div64(x, y)( \ | ||||
| { \ | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds