mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	bcachefs: Interior btree updates are now fully transactional
We now update the alloc info (bucket sector counts) atomically with journalling the update to the interior btree nodes, and we also set new btree roots atomically with the journalled part of the btree update. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
		
							parent
							
								
									c823c3390b
								
							
						
					
					
						commit
						00b8ccf707
					
				
					 21 changed files with 414 additions and 628 deletions
				
			
		|  | @ -1461,11 +1461,6 @@ again: | ||||||
| 		} | 		} | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 	if (c->btree_roots_dirty) { |  | ||||||
| 		bch2_journal_meta(&c->journal); |  | ||||||
| 		goto again; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return !nodes_unwritten && | 	return !nodes_unwritten && | ||||||
| 		!bch2_btree_interior_updates_nr_pending(c); | 		!bch2_btree_interior_updates_nr_pending(c); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -603,13 +603,10 @@ struct bch_fs { | ||||||
| 	struct bio_set		btree_bio; | 	struct bio_set		btree_bio; | ||||||
| 
 | 
 | ||||||
| 	struct btree_root	btree_roots[BTREE_ID_NR]; | 	struct btree_root	btree_roots[BTREE_ID_NR]; | ||||||
| 	bool			btree_roots_dirty; |  | ||||||
| 	struct mutex		btree_root_lock; | 	struct mutex		btree_root_lock; | ||||||
| 
 | 
 | ||||||
| 	struct btree_cache	btree_cache; | 	struct btree_cache	btree_cache; | ||||||
| 
 | 
 | ||||||
| 	mempool_t		btree_reserve_pool; |  | ||||||
| 
 |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Cache of allocated btree nodes - if we allocate a btree node and | 	 * Cache of allocated btree nodes - if we allocate a btree node and | ||||||
| 	 * don't use it, if we free it that space can't be reused until going | 	 * don't use it, if we free it that space can't be reused until going | ||||||
|  | @ -627,6 +624,9 @@ struct bch_fs { | ||||||
| 	struct mutex		btree_interior_update_lock; | 	struct mutex		btree_interior_update_lock; | ||||||
| 	struct closure_waitlist	btree_interior_update_wait; | 	struct closure_waitlist	btree_interior_update_wait; | ||||||
| 
 | 
 | ||||||
|  | 	struct workqueue_struct	*btree_interior_update_worker; | ||||||
|  | 	struct work_struct	btree_interior_update_work; | ||||||
|  | 
 | ||||||
| 	mempool_t		btree_iters_pool; | 	mempool_t		btree_iters_pool; | ||||||
| 
 | 
 | ||||||
| 	struct workqueue_struct	*wq; | 	struct workqueue_struct	*wq; | ||||||
|  |  | ||||||
|  | @ -466,6 +466,7 @@ static void bch2_mark_superblocks(struct bch_fs *c) | ||||||
| 	mutex_unlock(&c->sb_lock); | 	mutex_unlock(&c->sb_lock); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #if 0 | ||||||
| /* Also see bch2_pending_btree_node_free_insert_done() */ | /* Also see bch2_pending_btree_node_free_insert_done() */ | ||||||
| static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) | static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) | ||||||
| { | { | ||||||
|  | @ -483,6 +484,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) | ||||||
| 
 | 
 | ||||||
| 	mutex_unlock(&c->btree_interior_update_lock); | 	mutex_unlock(&c->btree_interior_update_lock); | ||||||
| } | } | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| static void bch2_mark_allocator_buckets(struct bch_fs *c) | static void bch2_mark_allocator_buckets(struct bch_fs *c) | ||||||
| { | { | ||||||
|  | @ -801,6 +803,10 @@ int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys, | ||||||
| 	trace_gc_start(c); | 	trace_gc_start(c); | ||||||
| 
 | 
 | ||||||
| 	down_write(&c->gc_lock); | 	down_write(&c->gc_lock); | ||||||
|  | 
 | ||||||
|  | 	/* flush interior btree updates: */ | ||||||
|  | 	closure_wait_event(&c->btree_interior_update_wait, | ||||||
|  | 			   !bch2_btree_interior_updates_nr_pending(c)); | ||||||
| again: | again: | ||||||
| 	ret = bch2_gc_start(c, metadata_only); | 	ret = bch2_gc_start(c, metadata_only); | ||||||
| 	if (ret) | 	if (ret) | ||||||
|  | @ -812,7 +818,9 @@ again: | ||||||
| 	if (ret) | 	if (ret) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
|  | #if 0 | ||||||
| 	bch2_mark_pending_btree_node_frees(c); | 	bch2_mark_pending_btree_node_frees(c); | ||||||
|  | #endif | ||||||
| 	bch2_mark_allocator_buckets(c); | 	bch2_mark_allocator_buckets(c); | ||||||
| 
 | 
 | ||||||
| 	c->gc_count++; | 	c->gc_count++; | ||||||
|  | @ -1037,6 +1045,8 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, | ||||||
| 		btree_node_reset_sib_u64s(n); | 		btree_node_reset_sib_u64s(n); | ||||||
| 
 | 
 | ||||||
| 		bch2_btree_build_aux_trees(n); | 		bch2_btree_build_aux_trees(n); | ||||||
|  | 
 | ||||||
|  | 		bch2_btree_update_add_new_node(as, n); | ||||||
| 		six_unlock_write(&n->c.lock); | 		six_unlock_write(&n->c.lock); | ||||||
| 
 | 
 | ||||||
| 		bch2_btree_node_write(c, n, SIX_LOCK_intent); | 		bch2_btree_node_write(c, n, SIX_LOCK_intent); | ||||||
|  | @ -1085,7 +1095,7 @@ next: | ||||||
| 	bch2_btree_iter_node_replace(iter, new_nodes[0]); | 	bch2_btree_iter_node_replace(iter, new_nodes[0]); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < nr_new_nodes; i++) | 	for (i = 0; i < nr_new_nodes; i++) | ||||||
| 		bch2_open_buckets_put(c, &new_nodes[i]->ob); | 		bch2_btree_update_get_open_buckets(as, new_nodes[i]); | ||||||
| 
 | 
 | ||||||
| 	/* Free the old nodes and update our sliding window */ | 	/* Free the old nodes and update our sliding window */ | ||||||
| 	for (i = 0; i < nr_old_nodes; i++) { | 	for (i = 0; i < nr_old_nodes; i++) { | ||||||
|  |  | ||||||
|  | @ -310,6 +310,7 @@ struct btree_trans { | ||||||
| 	/* update path: */ | 	/* update path: */ | ||||||
| 	struct jset_entry	*extra_journal_entries; | 	struct jset_entry	*extra_journal_entries; | ||||||
| 	unsigned		extra_journal_entry_u64s; | 	unsigned		extra_journal_entry_u64s; | ||||||
|  | 	struct journal_entry_pin *journal_pin; | ||||||
| 
 | 
 | ||||||
| 	struct journal_res	journal_res; | 	struct journal_res	journal_res; | ||||||
| 	struct journal_preres	journal_preres; | 	struct journal_preres	journal_preres; | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -6,34 +6,13 @@ | ||||||
| #include "btree_locking.h" | #include "btree_locking.h" | ||||||
| #include "btree_update.h" | #include "btree_update.h" | ||||||
| 
 | 
 | ||||||
| struct btree_reserve { |  | ||||||
| 	struct disk_reservation	disk_res; |  | ||||||
| 	unsigned		nr; |  | ||||||
| 	struct btree		*b[BTREE_RESERVE_MAX]; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *); | void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *); | ||||||
| bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *, | bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *, | ||||||
| 				struct bkey_format *); | 				struct bkey_format *); | ||||||
| 
 | 
 | ||||||
| /* Btree node freeing/allocation: */ | #define BTREE_UPDATE_NODES_MAX		((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES) | ||||||
| 
 | 
 | ||||||
| /*
 | #define BTREE_UPDATE_JOURNAL_RES	(BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1)) | ||||||
|  * Tracks a btree node that has been (or is about to be) freed in memory, but |  | ||||||
|  * has _not_ yet been freed on disk (because the write that makes the new |  | ||||||
|  * node(s) visible and frees the old hasn't completed yet) |  | ||||||
|  */ |  | ||||||
| struct pending_btree_node_free { |  | ||||||
| 	bool			index_update_done; |  | ||||||
| 
 |  | ||||||
| 	__le64			seq; |  | ||||||
| 	enum btree_id		btree_id; |  | ||||||
| 	unsigned		level; |  | ||||||
| 	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| #define BTREE_UPDATE_JOURNAL_RES		\ |  | ||||||
| 	((BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2) |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Tracks an in progress split/rewrite of a btree node and the update to the |  * Tracks an in progress split/rewrite of a btree node and the update to the | ||||||
|  | @ -72,9 +51,8 @@ struct btree_update { | ||||||
| 	unsigned			nodes_written:1; | 	unsigned			nodes_written:1; | ||||||
| 
 | 
 | ||||||
| 	enum btree_id			btree_id; | 	enum btree_id			btree_id; | ||||||
| 	u8				level; |  | ||||||
| 
 | 
 | ||||||
| 	struct btree_reserve		*reserve; | 	struct disk_reservation		disk_res; | ||||||
| 	struct journal_preres		journal_preres; | 	struct journal_preres		journal_preres; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
|  | @ -96,17 +74,28 @@ struct btree_update { | ||||||
| 	 */ | 	 */ | ||||||
| 	struct journal_entry_pin	journal; | 	struct journal_entry_pin	journal; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/* Preallocated nodes we reserve when we start the update: */ | ||||||
| 	 * Nodes being freed: | 	struct btree			*prealloc_nodes[BTREE_UPDATE_NODES_MAX]; | ||||||
| 	 * Protected by c->btree_node_pending_free_lock | 	unsigned			nr_prealloc_nodes; | ||||||
| 	 */ | 
 | ||||||
| 	struct pending_btree_node_free	pending[BTREE_MAX_DEPTH + GC_MERGE_NODES]; | 	/* Nodes being freed: */ | ||||||
| 	unsigned			nr_pending; | 	struct keylist			old_keys; | ||||||
|  | 	u64				_old_keys[BTREE_UPDATE_NODES_MAX * | ||||||
|  | 						  BKEY_BTREE_PTR_VAL_U64s_MAX]; | ||||||
|  | 
 | ||||||
|  | 	/* Nodes being added: */ | ||||||
|  | 	struct keylist			new_keys; | ||||||
|  | 	u64				_new_keys[BTREE_UPDATE_NODES_MAX * | ||||||
|  | 						  BKEY_BTREE_PTR_VAL_U64s_MAX]; | ||||||
| 
 | 
 | ||||||
| 	/* New nodes, that will be made reachable by this update: */ | 	/* New nodes, that will be made reachable by this update: */ | ||||||
| 	struct btree			*new_nodes[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES]; | 	struct btree			*new_nodes[BTREE_UPDATE_NODES_MAX]; | ||||||
| 	unsigned			nr_new_nodes; | 	unsigned			nr_new_nodes; | ||||||
| 
 | 
 | ||||||
|  | 	u8				open_buckets[BTREE_UPDATE_NODES_MAX * | ||||||
|  | 						     BCH_REPLICAS_MAX]; | ||||||
|  | 	u8				nr_open_buckets; | ||||||
|  | 
 | ||||||
| 	unsigned			journal_u64s; | 	unsigned			journal_u64s; | ||||||
| 	u64				journal_entries[BTREE_UPDATE_JOURNAL_RES]; | 	u64				journal_entries[BTREE_UPDATE_JOURNAL_RES]; | ||||||
| 
 | 
 | ||||||
|  | @ -120,14 +109,12 @@ struct btree_update { | ||||||
| 	u64				inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3]; | 	u64				inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3]; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define for_each_pending_btree_node_free(c, as, p)			\ |  | ||||||
| 	list_for_each_entry(as, &c->btree_interior_update_list, list)	\ |  | ||||||
| 		for (p = as->pending; p < as->pending + as->nr_pending; p++) |  | ||||||
| 
 |  | ||||||
| void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *, | void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *, | ||||||
| 				struct btree_iter *); | 				struct btree_iter *); | ||||||
| void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *); | void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *); | ||||||
| 
 | 
 | ||||||
|  | void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *); | ||||||
|  | 
 | ||||||
| struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, | struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, | ||||||
| 						  struct btree *, | 						  struct btree *, | ||||||
| 						  struct bkey_format); | 						  struct bkey_format); | ||||||
|  | @ -139,6 +126,7 @@ bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned, | ||||||
| 
 | 
 | ||||||
| void bch2_btree_interior_update_will_free_node(struct btree_update *, | void bch2_btree_interior_update_will_free_node(struct btree_update *, | ||||||
| 					       struct btree *); | 					       struct btree *); | ||||||
|  | void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); | ||||||
| 
 | 
 | ||||||
| void bch2_btree_insert_node(struct btree_update *, struct btree *, | void bch2_btree_insert_node(struct btree_update *, struct btree *, | ||||||
| 			    struct btree_iter *, struct keylist *, | 			    struct btree_iter *, struct keylist *, | ||||||
|  | @ -333,6 +321,10 @@ ssize_t bch2_btree_updates_print(struct bch_fs *, char *); | ||||||
| 
 | 
 | ||||||
| size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *); | size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *); | ||||||
| 
 | 
 | ||||||
|  | void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *); | ||||||
|  | struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, | ||||||
|  | 					struct jset_entry *, struct jset_entry *); | ||||||
|  | 
 | ||||||
| void bch2_fs_btree_interior_update_exit(struct bch_fs *); | void bch2_fs_btree_interior_update_exit(struct bch_fs *); | ||||||
| int bch2_fs_btree_interior_update_init(struct bch_fs *); | int bch2_fs_btree_interior_update_init(struct bch_fs *); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -414,8 +414,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(trans->extra_journal_entry_u64s)) { | 	if (unlikely(trans->extra_journal_entry_u64s)) { | ||||||
| 		memcpy_u64s_small(bch2_journal_reservation_entry(&c->journal, | 		memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), | ||||||
| 								 &trans->journal_res), |  | ||||||
| 				  trans->extra_journal_entries, | 				  trans->extra_journal_entries, | ||||||
| 				  trans->extra_journal_entry_u64s); | 				  trans->extra_journal_entry_u64s); | ||||||
| 
 | 
 | ||||||
|  | @ -521,6 +520,10 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, | ||||||
| 			bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b, | 			bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b, | ||||||
| 							     i->iter); | 							     i->iter); | ||||||
| 
 | 
 | ||||||
|  | 	if (!ret && trans->journal_pin) | ||||||
|  | 		bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq, | ||||||
|  | 				     trans->journal_pin, NULL); | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Drop journal reservation after dropping write locks, since dropping | 	 * Drop journal reservation after dropping write locks, since dropping | ||||||
| 	 * the journal reservation may kick off a journal write: | 	 * the journal reservation may kick off a journal write: | ||||||
|  |  | ||||||
|  | @ -1180,7 +1180,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int bch2_mark_key_locked(struct bch_fs *c, | static int bch2_mark_key_locked(struct bch_fs *c, | ||||||
| 		   struct bkey_s_c k, | 		   struct bkey_s_c k, | ||||||
| 		   unsigned offset, s64 sectors, | 		   unsigned offset, s64 sectors, | ||||||
| 		   struct bch_fs_usage *fs_usage, | 		   struct bch_fs_usage *fs_usage, | ||||||
|  |  | ||||||
|  | @ -259,8 +259,6 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, | ||||||
| 			       size_t, enum bch_data_type, unsigned, | 			       size_t, enum bch_data_type, unsigned, | ||||||
| 			       struct gc_pos, unsigned); | 			       struct gc_pos, unsigned); | ||||||
| 
 | 
 | ||||||
| int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, unsigned, s64, |  | ||||||
| 			 struct bch_fs_usage *, u64, unsigned); |  | ||||||
| int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64, | int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64, | ||||||
| 		  struct bch_fs_usage *, u64, unsigned); | 		  struct bch_fs_usage *, u64, unsigned); | ||||||
| int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage_online *, | int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage_online *, | ||||||
|  |  | ||||||
|  | @ -958,15 +958,12 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) | ||||||
| 
 | 
 | ||||||
| void bch2_fs_journal_stop(struct journal *j) | void bch2_fs_journal_stop(struct journal *j) | ||||||
| { | { | ||||||
| 	struct bch_fs *c = container_of(j, struct bch_fs, journal); |  | ||||||
| 
 |  | ||||||
| 	bch2_journal_flush_all_pins(j); | 	bch2_journal_flush_all_pins(j); | ||||||
| 
 | 
 | ||||||
| 	wait_event(j->wait, journal_entry_close(j)); | 	wait_event(j->wait, journal_entry_close(j)); | ||||||
| 
 | 
 | ||||||
| 	/* do we need to write another journal entry? */ | 	/* do we need to write another journal entry? */ | ||||||
| 	if (test_bit(JOURNAL_NOT_EMPTY, &j->flags) || | 	if (test_bit(JOURNAL_NOT_EMPTY, &j->flags)) | ||||||
| 	    c->btree_roots_dirty) |  | ||||||
| 		bch2_journal_meta(j); | 		bch2_journal_meta(j); | ||||||
| 
 | 
 | ||||||
| 	journal_quiesce(j); | 	journal_quiesce(j); | ||||||
|  |  | ||||||
|  | @ -200,25 +200,15 @@ bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct jset_entry * | static inline struct jset_entry * | ||||||
| bch2_journal_reservation_entry(struct journal *j, struct journal_res *res) | journal_res_entry(struct journal *j, struct journal_res *res) | ||||||
| { | { | ||||||
| 	return vstruct_idx(j->buf[res->idx].data, res->offset); | 	return vstruct_idx(j->buf[res->idx].data, res->offset); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res, | static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type, | ||||||
| 					  unsigned type, enum btree_id id, | 					  enum btree_id id, unsigned level, | ||||||
| 					  unsigned level, |  | ||||||
| 					  const void *data, unsigned u64s) | 					  const void *data, unsigned u64s) | ||||||
| { | { | ||||||
| 	struct jset_entry *entry = bch2_journal_reservation_entry(j, res); |  | ||||||
| 	unsigned actual = jset_u64s(u64s); |  | ||||||
| 
 |  | ||||||
| 	EBUG_ON(!res->ref); |  | ||||||
| 	EBUG_ON(actual > res->u64s); |  | ||||||
| 
 |  | ||||||
| 	res->offset	+= actual; |  | ||||||
| 	res->u64s	-= actual; |  | ||||||
| 
 |  | ||||||
| 	entry->u64s	= cpu_to_le16(u64s); | 	entry->u64s	= cpu_to_le16(u64s); | ||||||
| 	entry->btree_id = id; | 	entry->btree_id = id; | ||||||
| 	entry->level	= level; | 	entry->level	= level; | ||||||
|  | @ -227,6 +217,23 @@ static inline void bch2_journal_add_entry(struct journal *j, struct journal_res | ||||||
| 	entry->pad[1]	= 0; | 	entry->pad[1]	= 0; | ||||||
| 	entry->pad[2]	= 0; | 	entry->pad[2]	= 0; | ||||||
| 	memcpy_u64s_small(entry->_data, data, u64s); | 	memcpy_u64s_small(entry->_data, data, u64s); | ||||||
|  | 
 | ||||||
|  | 	return jset_u64s(u64s); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res, | ||||||
|  | 					  unsigned type, enum btree_id id, | ||||||
|  | 					  unsigned level, | ||||||
|  | 					  const void *data, unsigned u64s) | ||||||
|  | { | ||||||
|  | 	unsigned actual = journal_entry_set(journal_res_entry(j, res), | ||||||
|  | 			       type, id, level, data, u64s); | ||||||
|  | 
 | ||||||
|  | 	EBUG_ON(!res->ref); | ||||||
|  | 	EBUG_ON(actual > res->u64s); | ||||||
|  | 
 | ||||||
|  | 	res->offset	+= actual; | ||||||
|  | 	res->u64s	-= actual; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res, | static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res, | ||||||
|  |  | ||||||
|  | @ -2,6 +2,7 @@ | ||||||
| #include "bcachefs.h" | #include "bcachefs.h" | ||||||
| #include "alloc_foreground.h" | #include "alloc_foreground.h" | ||||||
| #include "btree_io.h" | #include "btree_io.h" | ||||||
|  | #include "btree_update_interior.h" | ||||||
| #include "buckets.h" | #include "buckets.h" | ||||||
| #include "checksum.h" | #include "checksum.h" | ||||||
| #include "error.h" | #include "error.h" | ||||||
|  | @ -992,8 +993,23 @@ void bch2_journal_write(struct closure *cl) | ||||||
| 
 | 
 | ||||||
| 	j->write_start_time = local_clock(); | 	j->write_start_time = local_clock(); | ||||||
| 
 | 
 | ||||||
| 	start	= vstruct_last(jset); | 	/*
 | ||||||
| 	end	= bch2_journal_super_entries_add_common(c, start, | 	 * New btree roots are set by journalling them; when the journal entry | ||||||
|  | 	 * gets written we have to propagate them to c->btree_roots | ||||||
|  | 	 * | ||||||
|  | 	 * But, every journal entry we write has to contain all the btree roots | ||||||
|  | 	 * (at least for now); so after we copy btree roots to c->btree_roots we | ||||||
|  | 	 * have to get any missing btree roots and add them to this journal | ||||||
|  | 	 * entry: | ||||||
|  | 	 */ | ||||||
|  | 
 | ||||||
|  | 	bch2_journal_entries_to_btree_roots(c, jset); | ||||||
|  | 
 | ||||||
|  | 	start = end = vstruct_last(jset); | ||||||
|  | 
 | ||||||
|  | 	end	= bch2_btree_roots_to_journal_entries(c, jset->start, end); | ||||||
|  | 
 | ||||||
|  | 	end	= bch2_journal_super_entries_add_common(c, end, | ||||||
| 						le64_to_cpu(jset->seq)); | 						le64_to_cpu(jset->seq)); | ||||||
| 	u64s	= (u64 *) end - (u64 *) start; | 	u64s	= (u64 *) end - (u64 *) start; | ||||||
| 	BUG_ON(u64s > j->entry_u64s_reserved); | 	BUG_ON(u64s > j->entry_u64s_reserved); | ||||||
|  |  | ||||||
|  | @ -330,7 +330,7 @@ static void bch2_journal_pin_add_locked(struct journal *j, u64 seq, | ||||||
| 
 | 
 | ||||||
| 	__journal_pin_drop(j, pin); | 	__journal_pin_drop(j, pin); | ||||||
| 
 | 
 | ||||||
| 	BUG_ON(!atomic_read(&pin_list->count)); | 	BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j)); | ||||||
| 
 | 
 | ||||||
| 	atomic_inc(&pin_list->count); | 	atomic_inc(&pin_list->count); | ||||||
| 	pin->seq	= seq; | 	pin->seq	= seq; | ||||||
|  |  | ||||||
|  | @ -38,7 +38,7 @@ static inline void bch2_journal_pin_add(struct journal *j, u64 seq, | ||||||
| 					struct journal_entry_pin *pin, | 					struct journal_entry_pin *pin, | ||||||
| 					journal_pin_flush_fn flush_fn) | 					journal_pin_flush_fn flush_fn) | ||||||
| { | { | ||||||
| 	if (unlikely(!journal_pin_active(pin))) | 	if (unlikely(!journal_pin_active(pin) || pin->seq > seq)) | ||||||
| 		__bch2_journal_pin_add(j, seq, pin, flush_fn); | 		__bch2_journal_pin_add(j, seq, pin, flush_fn); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -6,7 +6,7 @@ | ||||||
| int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, | int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, | ||||||
| 			size_t nr_inline_u64s, size_t new_u64s) | 			size_t nr_inline_u64s, size_t new_u64s) | ||||||
| { | { | ||||||
| 	size_t oldsize = bch_keylist_u64s(l); | 	size_t oldsize = bch2_keylist_u64s(l); | ||||||
| 	size_t newsize = oldsize + new_u64s; | 	size_t newsize = oldsize + new_u64s; | ||||||
| 	u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p; | 	u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p; | ||||||
| 	u64 *new_keys; | 	u64 *new_keys; | ||||||
|  | @ -52,7 +52,7 @@ void bch2_keylist_pop_front(struct keylist *l) | ||||||
| 
 | 
 | ||||||
| 	memmove_u64s_down(l->keys, | 	memmove_u64s_down(l->keys, | ||||||
| 			  bkey_next(l->keys), | 			  bkey_next(l->keys), | ||||||
| 			  bch_keylist_u64s(l)); | 			  bch2_keylist_u64s(l)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_BCACHEFS_DEBUG | #ifdef CONFIG_BCACHEFS_DEBUG | ||||||
|  |  | ||||||
|  | @ -36,14 +36,14 @@ static inline bool bch2_keylist_empty(struct keylist *l) | ||||||
| 	return l->top == l->keys; | 	return l->top == l->keys; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline size_t bch_keylist_u64s(struct keylist *l) | static inline size_t bch2_keylist_u64s(struct keylist *l) | ||||||
| { | { | ||||||
| 	return l->top_p - l->keys_p; | 	return l->top_p - l->keys_p; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline size_t bch2_keylist_bytes(struct keylist *l) | static inline size_t bch2_keylist_bytes(struct keylist *l) | ||||||
| { | { | ||||||
| 	return bch_keylist_u64s(l) * sizeof(u64); | 	return bch2_keylist_u64s(l) * sizeof(u64); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct bkey_i *bch2_keylist_front(struct keylist *l) | static inline struct bkey_i *bch2_keylist_front(struct keylist *l) | ||||||
|  |  | ||||||
|  | @ -151,15 +151,8 @@ retry: | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* flush relevant btree updates */ | 	/* flush relevant btree updates */ | ||||||
| 	while (1) { | 	closure_wait_event(&c->btree_interior_update_wait, | ||||||
| 		closure_wait_event(&c->btree_interior_update_wait, | 			   !bch2_btree_interior_updates_nr_pending(c)); | ||||||
| 				   !bch2_btree_interior_updates_nr_pending(c) || |  | ||||||
| 				   c->btree_roots_dirty); |  | ||||||
| 		if (c->btree_roots_dirty) |  | ||||||
| 			bch2_journal_meta(&c->journal); |  | ||||||
| 		if (!bch2_btree_interior_updates_nr_pending(c)) |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	ret = 0; | 	ret = 0; | ||||||
| err: | err: | ||||||
|  |  | ||||||
|  | @ -774,14 +774,8 @@ int bch2_data_job(struct bch_fs *c, | ||||||
| 
 | 
 | ||||||
| 		ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; | 		ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; | ||||||
| 
 | 
 | ||||||
| 		while (1) { | 		closure_wait_event(&c->btree_interior_update_wait, | ||||||
| 			closure_wait_event(&c->btree_interior_update_wait, | 				   !bch2_btree_interior_updates_nr_pending(c)); | ||||||
| 					   !bch2_btree_interior_updates_nr_pending(c) || |  | ||||||
| 					   c->btree_roots_dirty); |  | ||||||
| 			if (!bch2_btree_interior_updates_nr_pending(c)) |  | ||||||
| 				break; |  | ||||||
| 			bch2_journal_meta(&c->journal); |  | ||||||
| 		} |  | ||||||
| 
 | 
 | ||||||
| 		ret = bch2_replicas_gc2(c) ?: ret; | 		ret = bch2_replicas_gc2(c) ?: ret; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -763,6 +763,7 @@ static int verify_superblock_clean(struct bch_fs *c, | ||||||
| 			"superblock read clock doesn't match journal after clean shutdown"); | 			"superblock read clock doesn't match journal after clean shutdown"); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < BTREE_ID_NR; i++) { | 	for (i = 0; i < BTREE_ID_NR; i++) { | ||||||
|  | 		char buf1[200], buf2[200]; | ||||||
| 		struct bkey_i *k1, *k2; | 		struct bkey_i *k1, *k2; | ||||||
| 		unsigned l1 = 0, l2 = 0; | 		unsigned l1 = 0, l2 = 0; | ||||||
| 
 | 
 | ||||||
|  | @ -778,7 +779,11 @@ static int verify_superblock_clean(struct bch_fs *c, | ||||||
| 				    k1->k.u64s != k2->k.u64s || | 				    k1->k.u64s != k2->k.u64s || | ||||||
| 				    memcmp(k1, k2, bkey_bytes(k1)) || | 				    memcmp(k1, k2, bkey_bytes(k1)) || | ||||||
| 				    l1 != l2, c, | 				    l1 != l2, c, | ||||||
| 			"superblock btree root doesn't match journal after clean shutdown"); | 			"superblock btree root %u doesn't match journal after clean shutdown\n" | ||||||
|  | 			"sb:      l=%u %s\n" | ||||||
|  | 			"journal: l=%u %s\n", i, | ||||||
|  | 			l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1), | ||||||
|  | 			l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2)); | ||||||
| 	} | 	} | ||||||
| fsck_err: | fsck_err: | ||||||
| 	return ret; | 	return ret; | ||||||
|  |  | ||||||
|  | @ -1,6 +1,7 @@ | ||||||
| // SPDX-License-Identifier: GPL-2.0
 | // SPDX-License-Identifier: GPL-2.0
 | ||||||
| 
 | 
 | ||||||
| #include "bcachefs.h" | #include "bcachefs.h" | ||||||
|  | #include "btree_update_interior.h" | ||||||
| #include "buckets.h" | #include "buckets.h" | ||||||
| #include "checksum.h" | #include "checksum.h" | ||||||
| #include "disk_groups.h" | #include "disk_groups.h" | ||||||
|  | @ -955,7 +956,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c) | ||||||
| 
 | 
 | ||||||
| 	mutex_lock(&c->sb_lock); | 	mutex_lock(&c->sb_lock); | ||||||
| 	SET_BCH_SB_CLEAN(c->disk_sb.sb, false); | 	SET_BCH_SB_CLEAN(c->disk_sb.sb, false); | ||||||
| 	c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA); |  | ||||||
| 	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; | 	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; | ||||||
| 	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates; | 	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates; | ||||||
| 	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled; | 	c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled; | ||||||
|  | @ -989,27 +989,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, | ||||||
| 				      struct jset_entry *entry, | 				      struct jset_entry *entry, | ||||||
| 				      u64 journal_seq) | 				      u64 journal_seq) | ||||||
| { | { | ||||||
| 	struct btree_root *r; |  | ||||||
| 	unsigned i; | 	unsigned i; | ||||||
| 
 | 
 | ||||||
| 	mutex_lock(&c->btree_root_lock); |  | ||||||
| 
 |  | ||||||
| 	for (r = c->btree_roots; |  | ||||||
| 	     r < c->btree_roots + BTREE_ID_NR; |  | ||||||
| 	     r++) |  | ||||||
| 		if (r->alive) { |  | ||||||
| 			entry_init_u64s(entry, r->key.u64s + 1); |  | ||||||
| 			entry->btree_id	= r - c->btree_roots; |  | ||||||
| 			entry->level	= r->level; |  | ||||||
| 			entry->type	= BCH_JSET_ENTRY_btree_root; |  | ||||||
| 			bkey_copy(&entry->start[0], &r->key); |  | ||||||
| 
 |  | ||||||
| 			entry = vstruct_next(entry); |  | ||||||
| 		} |  | ||||||
| 	c->btree_roots_dirty = false; |  | ||||||
| 
 |  | ||||||
| 	mutex_unlock(&c->btree_root_lock); |  | ||||||
| 
 |  | ||||||
| 	percpu_down_read(&c->mark_lock); | 	percpu_down_read(&c->mark_lock); | ||||||
| 
 | 
 | ||||||
| 	if (!journal_seq) { | 	if (!journal_seq) { | ||||||
|  | @ -1111,6 +1092,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) | ||||||
| 
 | 
 | ||||||
| 	entry = sb_clean->start; | 	entry = sb_clean->start; | ||||||
| 	entry = bch2_journal_super_entries_add_common(c, entry, 0); | 	entry = bch2_journal_super_entries_add_common(c, entry, 0); | ||||||
|  | 	entry = bch2_btree_roots_to_journal_entries(c, entry, entry); | ||||||
| 	BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); | 	BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); | ||||||
| 
 | 
 | ||||||
| 	memset(entry, 0, | 	memset(entry, 0, | ||||||
|  |  | ||||||
|  | @ -227,6 +227,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) | ||||||
| 		 */ | 		 */ | ||||||
| 		closure_wait_event(&c->btree_interior_update_wait, | 		closure_wait_event(&c->btree_interior_update_wait, | ||||||
| 				   !bch2_btree_interior_updates_nr_pending(c)); | 				   !bch2_btree_interior_updates_nr_pending(c)); | ||||||
|  | 		flush_work(&c->btree_interior_update_work); | ||||||
| 
 | 
 | ||||||
| 		clean_passes = wrote ? 0 : clean_passes + 1; | 		clean_passes = wrote ? 0 : clean_passes + 1; | ||||||
| 	} while (clean_passes < 2); | 	} while (clean_passes < 2); | ||||||
|  | @ -234,6 +235,10 @@ static void __bch2_fs_read_only(struct bch_fs *c) | ||||||
| 	bch_verbose(c, "writing alloc info complete"); | 	bch_verbose(c, "writing alloc info complete"); | ||||||
| 	set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); | 	set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); | ||||||
| nowrote_alloc: | nowrote_alloc: | ||||||
|  | 	closure_wait_event(&c->btree_interior_update_wait, | ||||||
|  | 			   !bch2_btree_interior_updates_nr_pending(c)); | ||||||
|  | 	flush_work(&c->btree_interior_update_work); | ||||||
|  | 
 | ||||||
| 	for_each_member_device(ca, c, i) | 	for_each_member_device(ca, c, i) | ||||||
| 		bch2_dev_allocator_stop(ca); | 		bch2_dev_allocator_stop(ca); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Kent Overstreet
						Kent Overstreet