mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	for-6.2-tag
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmOSLtIACgkQxWXV+ddt
 WDvpQA//dQ3Wosz5puFNiZvoSUn/BnYJueZHjwF0bWY8OYINkF1PvDenu/WotyFz
 Ozf4Yl4Afxncz+FjDnOtlpr6KsSU5NqdGM3NrY0eNsxd2t1KrTsN0LgkA4m24p8b
 YsYp7pygbMm7c+h0X4uFpebY4lABkEPCBXnI//ktsls0xG5sOvGfZA3rdUP0bou2
 JTn6hk+s0cLTNoTiOCGNHRJbeTzHLR0viZj/E4LCJfCeJvAmOLZamUjqe9sBNYAg
 YtsrZTpUIL3JgmRi5B6jG4fHSXOnE14mKmRIR3xPME6J6eoYyNOeuSh1oNmJEuoE
 B7nD5We+x5+isjXNw/V5CQrs7FF09UbdpbNb9NF5CYQWv40OCeefuai1opGtBUxX
 dvbfmf1blYpWW/wfFOKQwMOsl8kZIZYx68FW2OBUNglB6yRpX/3QgFSGb8kPCr83
 DW2ttqwkpSNPMKk92I/owIc4BRvZ+LMR/PimEHB/Sa2apZA2/L+7RGwoaaei1QNX
 1tJxHWeJFLDZ+YRxjO1eKqhWdGQPn1kkq8LoXLi3tGaNF4kYQfhWOSM3WRowvx1q
 f99XRgA8JQnqZS83zqRIspWlpFK0CFdvzG1Zlqx+eoxERfeaMNA2fHxv1YCyFV4+
 TiXgsnCo+PIBwlvL/HjUWZgYE9+AD+NN5vyoE2UDYff4AgBFTE8=
 =Nqg9
 -----END PGP SIGNATURE-----
Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
 "This round there are a lot of cleanups and moved code so the diffstat
  looks huge, otherwise there are some nice performance improvements and
  an update to raid56 reliability.
  User visible features:
   - raid56 reliability vs performance trade off:
      - fix destructive RMW for raid5 data (raid6 still needs work): do
        full checksum verification for all data during RMW cycle, this
        should prevent rewriting potentially corrupted data without
        notice
      - stripes are cached in memory which should reduce the performance
        impact but still can hurt some workloads
      - checksums are verified after repair again
      - this is the last option without introducing additional features
        (write intent bitmap, journal, another tree), the extra checksum
        read/verification was supposed to be avoided by the original
        implementation exactly for performance reasons but that caused
        all the reliability problems
   - discard=async by default for devices that support it
   - implement emergency flush reserve to avoid almost all unnecessary
     transaction aborts due to ENOSPC in cases where there are too many
     delayed refs or delayed allocation
   - skip block group synchronization if there's no change in used
     bytes, can reduce transaction commit count for some workloads
  Performance improvements:
   - fiemap and lseek:
      - overall speedup due to skipping unnecessary or duplicate
        searches (-40% run time)
      - cache some data structures and sharedness of extents (-30% run
        time)
   - send:
      - faster backref resolution when finding clones
      - cached leaf to root mapping for faster backref walking
      - improved clone/sharing detection
      - overall run time improvements (-70%)
  Core:
   - module initialization converted to a table of function pointers run
     in a sequence
   - preparation for fscrypt, extend passing file names across calls,
     dir item can store encryption status
   - raid56 updates:
      - more accurate error tracking of sectors within stripe
      - simplify recovery path and remove dedicated endio worker kthread
      - simplify scrub call paths
      - refactoring to support the extra data checksum verification
        during RMW cycle
   - tree block parentness checks consolidated and done at metadata read
     time
   - improved error handling
   - cleanups:
      - move a lot of code for better synchronization between kernel and
        user space sources, split big files
      - enum cleanups
      - GFP flag cleanups
      - header file cleanups, prototypes, dependencies
      - redundant parameter cleanups
      - inline extent handling simplifications
      - inode parameter conversion
      - data structure cleanups, reductions, renames, merges"
* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
  btrfs: print transaction aborted messages with an error level
  btrfs: sync some cleanups from progs into uapi/btrfs.h
  btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
  btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
  btrfs: remove outdated logic from overwrite_item() and add assertion
  btrfs: unify overwrite_item() and do_overwrite_item()
  btrfs: replace strncpy() with strscpy()
  btrfs: fix uninitialized variable in find_first_clear_extent_bit
  btrfs: fix uninitialized parent in insert_state
  btrfs: add might_sleep() annotations
  btrfs: add stack helpers for a few btrfs items
  btrfs: add nr_global_roots to the super block definition
  btrfs: remove BTRFS_LEAF_DATA_OFFSET
  btrfs: add helpers for manipulating leaf items and data
  btrfs: add eb to btrfs_node_key_ptr_offset
  btrfs: pass the extent buffer for the btrfs_item_nr helpers
  btrfs: move the csum helpers into ctree.h
  btrfs: move eb offset helpers into extent_io.h
  btrfs: move file_extent_item helpers into file-item.h
  btrfs: move leaf_data_end into ctree.c
  ...
			
			
This commit is contained in:
		
						commit
						149c51f876
					
				
					 121 changed files with 11247 additions and 9489 deletions
				
			
		|  | @ -23,15 +23,15 @@ obj-$(CONFIG_BTRFS_FS) := btrfs.o | |||
| 
 | ||||
| btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 | ||||
| 	   file-item.o inode-item.o disk-io.o \
 | ||||
| 	   transaction.o inode.o file.o tree-defrag.o \
 | ||||
| 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 | ||||
| 	   transaction.o inode.o file.o defrag.o \
 | ||||
| 	   extent_map.o sysfs.o accessors.o xattr.o ordered-data.o \
 | ||||
| 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
 | ||||
| 	   export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
 | ||||
| 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
 | ||||
| 	   backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
 | ||||
| 	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
 | ||||
| 	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
 | ||||
| 	   subpage.o tree-mod-log.o extent-io-tree.o | ||||
| 	   subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o | ||||
| 
 | ||||
| btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | ||||
| btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | ||||
|  |  | |||
|  | @ -4,8 +4,9 @@ | |||
|  */ | ||||
| 
 | ||||
| #include <asm/unaligned.h> | ||||
| 
 | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| static bool check_setget_bounds(const struct extent_buffer *eb, | ||||
| 				const void *ptr, unsigned off, int size) | ||||
|  | @ -23,6 +24,13 @@ static bool check_setget_bounds(const struct extent_buffer *eb, | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb) | ||||
| { | ||||
| 	token->eb = eb; | ||||
| 	token->kaddr = page_address(eb->pages[0]); | ||||
| 	token->offset = 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Macro templates that define helpers to read/write extent buffer data of a | ||||
|  * given size, that are also used via ctree.h for access to item members by | ||||
|  | @ -160,7 +168,7 @@ DEFINE_BTRFS_SETGET_BITS(64) | |||
| void btrfs_node_key(const struct extent_buffer *eb, | ||||
| 		    struct btrfs_disk_key *disk_key, int nr) | ||||
| { | ||||
| 	unsigned long ptr = btrfs_node_key_ptr_offset(nr); | ||||
| 	unsigned long ptr = btrfs_node_key_ptr_offset(eb, nr); | ||||
| 	read_eb_member(eb, (struct btrfs_key_ptr *)ptr, | ||||
| 		       struct btrfs_key_ptr, key, disk_key); | ||||
| } | ||||
							
								
								
									
										1073
									
								
								fs/btrfs/accessors.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1073
									
								
								fs/btrfs/accessors.h
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -11,10 +11,10 @@ | |||
| #include <linux/sched.h> | ||||
| #include <linux/sched/mm.h> | ||||
| #include <linux/slab.h> | ||||
| 
 | ||||
| #include "ctree.h" | ||||
| #include "btrfs_inode.h" | ||||
| #include "xattr.h" | ||||
| #include "acl.h" | ||||
| 
 | ||||
| struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu) | ||||
| { | ||||
|  |  | |||
							
								
								
									
										27
									
								
								fs/btrfs/acl.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								fs/btrfs/acl.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,27 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_ACL_H | ||||
| #define BTRFS_ACL_H | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_POSIX_ACL | ||||
| 
 | ||||
| struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); | ||||
| int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, | ||||
| 		  struct posix_acl *acl, int type); | ||||
| int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode, | ||||
| 		    struct posix_acl *acl, int type); | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| #define btrfs_get_acl NULL | ||||
| #define btrfs_set_acl NULL | ||||
| static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans, | ||||
| 				  struct inode *inode, struct posix_acl *acl, | ||||
| 				  int type) | ||||
| { | ||||
| 	return -EOPNOTSUPP; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										1001
									
								
								fs/btrfs/backref.c
									
										
									
									
									
								
							
							
						
						
									
										1001
									
								
								fs/btrfs/backref.c
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -7,10 +7,128 @@ | |||
| #define BTRFS_BACKREF_H | ||||
| 
 | ||||
| #include <linux/btrfs.h> | ||||
| #include "messages.h" | ||||
| #include "ulist.h" | ||||
| #include "disk-io.h" | ||||
| #include "extent_io.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Used by implementations of iterate_extent_inodes_t (see definition below) to | ||||
|  * signal that backref iteration can stop immediately and no error happened. | ||||
|  * The value must be non-negative and must not be 0, 1 (which is a common return | ||||
|  * value from things like btrfs_search_slot() and used internally in the backref | ||||
|  * walking code) and different from BACKREF_FOUND_SHARED and | ||||
|  * BACKREF_FOUND_NOT_SHARED | ||||
|  */ | ||||
| #define BTRFS_ITERATE_EXTENT_INODES_STOP 5 | ||||
| 
 | ||||
| /*
 | ||||
|  * Should return 0 if no errors happened and iteration of backrefs should | ||||
|  * continue. Can return BTRFS_ITERATE_EXTENT_INODES_STOP or any other non-zero | ||||
|  * value to immediately stop iteration and possibly signal an error back to | ||||
|  * the caller. | ||||
|  */ | ||||
| typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes, | ||||
| 				      u64 root, void *ctx); | ||||
| 
 | ||||
| /*
 | ||||
|  * Context and arguments for backref walking functions. Some of the fields are | ||||
|  * to be filled by the caller of such functions while other are filled by the | ||||
|  * functions themselves, as described below. | ||||
|  */ | ||||
| struct btrfs_backref_walk_ctx { | ||||
| 	/*
 | ||||
| 	 * The address of the extent for which we are doing backref walking. | ||||
| 	 * Can be either a data extent or a metadata extent. | ||||
| 	 * | ||||
| 	 * Must always be set by the top level caller. | ||||
| 	 */ | ||||
| 	u64 bytenr; | ||||
| 	/*
 | ||||
| 	 * Offset relative to the target extent. This is only used for data | ||||
| 	 * extents, and it's meaningful because we can have file extent items | ||||
| 	 * that point only to a section of a data extent ("bookend" extents), | ||||
| 	 * and we want to filter out any that don't point to a section of the | ||||
| 	 * data extent containing the given offset. | ||||
| 	 * | ||||
| 	 * Must always be set by the top level caller. | ||||
| 	 */ | ||||
| 	u64 extent_item_pos; | ||||
| 	/*
 | ||||
| 	 * If true and bytenr corresponds to a data extent, then references from | ||||
| 	 * all file extent items that point to the data extent are considered, | ||||
| 	 * @extent_item_pos is ignored. | ||||
| 	 */ | ||||
| 	bool ignore_extent_item_pos; | ||||
| 	/* A valid transaction handle or NULL. */ | ||||
| 	struct btrfs_trans_handle *trans; | ||||
| 	/*
 | ||||
| 	 * The file system's info object, can not be NULL. | ||||
| 	 * | ||||
| 	 * Must always be set by the top level caller. | ||||
| 	 */ | ||||
| 	struct btrfs_fs_info *fs_info; | ||||
| 	/*
 | ||||
| 	 * Time sequence acquired from btrfs_get_tree_mod_seq(), in case the | ||||
| 	 * caller joined the tree mod log to get a consistent view of b+trees | ||||
| 	 * while we do backref walking, or BTRFS_SEQ_LAST. | ||||
| 	 * When using BTRFS_SEQ_LAST, delayed refs are not checked and it uses | ||||
| 	 * commit roots when searching b+trees - this is a special case for | ||||
| 	 * qgroups used during a transaction commit. | ||||
| 	 */ | ||||
| 	u64 time_seq; | ||||
| 	/*
 | ||||
| 	 * Used to collect the bytenr of metadata extents that point to the | ||||
| 	 * target extent. | ||||
| 	 */ | ||||
| 	struct ulist *refs; | ||||
| 	/*
 | ||||
| 	 * List used to collect the IDs of the roots from which the target | ||||
| 	 * extent is accessible. Can be NULL in case the caller does not care | ||||
| 	 * about collecting root IDs. | ||||
| 	 */ | ||||
| 	struct ulist *roots; | ||||
| 	/*
 | ||||
| 	 * Used by iterate_extent_inodes() and the main backref walk code | ||||
| 	 * (find_parent_nodes()). Lookup and store functions for an optional | ||||
| 	 * cache which maps the logical address (bytenr) of leaves to an array | ||||
| 	 * of root IDs. | ||||
| 	 */ | ||||
| 	bool (*cache_lookup)(u64 leaf_bytenr, void *user_ctx, | ||||
| 			     const u64 **root_ids_ret, int *root_count_ret); | ||||
| 	void (*cache_store)(u64 leaf_bytenr, const struct ulist *root_ids, | ||||
| 			    void *user_ctx); | ||||
| 	/*
 | ||||
| 	 * If this is not NULL, then the backref walking code will call this | ||||
| 	 * for each indirect data extent reference as soon as it finds one, | ||||
| 	 * before collecting all the remaining backrefs and before resolving | ||||
| 	 * indirect backrefs. This allows for the caller to terminate backref | ||||
| 	 * walking as soon as it finds one backref that matches some specific | ||||
| 	 * criteria. The @cache_lookup and @cache_store callbacks should not | ||||
| 	 * be NULL in order to use this callback. | ||||
| 	 */ | ||||
| 	iterate_extent_inodes_t *indirect_ref_iterator; | ||||
| 	/*
 | ||||
| 	 * If this is not NULL, then the backref walking code will call this for | ||||
| 	 * each extent item it's meant to process before it actually starts | ||||
| 	 * processing it. If this returns anything other than 0, then it stops | ||||
| 	 * the backref walking code immediately. | ||||
| 	 */ | ||||
| 	int (*check_extent_item)(u64 bytenr, const struct btrfs_extent_item *ei, | ||||
| 				 const struct extent_buffer *leaf, void *user_ctx); | ||||
| 	/*
 | ||||
| 	 * If this is not NULL, then the backref walking code will call this for | ||||
| 	 * each extent data ref it finds (BTRFS_EXTENT_DATA_REF_KEY keys) before | ||||
| 	 * processing that data ref. If this callback return false, then it will | ||||
| 	 * ignore this data ref and it will never resolve the indirect data ref, | ||||
| 	 * saving time searching for leaves in a fs tree with file extent items | ||||
| 	 * matching the data ref. | ||||
| 	 */ | ||||
| 	bool (*skip_data_ref)(u64 root, u64 ino, u64 offset, void *user_ctx); | ||||
| 	/* Context object to pass to the callbacks defined above. */ | ||||
| 	void *user_ctx; | ||||
| }; | ||||
| 
 | ||||
| struct inode_fs_paths { | ||||
| 	struct btrfs_path		*btrfs_path; | ||||
| 	struct btrfs_root		*fs_root; | ||||
|  | @ -23,17 +141,59 @@ struct btrfs_backref_shared_cache_entry { | |||
| 	bool is_shared; | ||||
| }; | ||||
| 
 | ||||
| struct btrfs_backref_shared_cache { | ||||
| #define BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE 8 | ||||
| 
 | ||||
| struct btrfs_backref_share_check_ctx { | ||||
| 	/* Ulists used during backref walking. */ | ||||
| 	struct ulist refs; | ||||
| 	/*
 | ||||
| 	 * The current leaf the caller of btrfs_is_data_extent_shared() is at. | ||||
| 	 * Typically the caller (at the moment only fiemap) tries to determine | ||||
| 	 * the sharedness of data extents point by file extent items from entire | ||||
| 	 * leaves. | ||||
| 	 */ | ||||
| 	u64 curr_leaf_bytenr; | ||||
| 	/*
 | ||||
| 	 * The previous leaf the caller was at in the previous call to | ||||
| 	 * btrfs_is_data_extent_shared(). This may be the same as the current | ||||
| 	 * leaf. On the first call it must be 0. | ||||
| 	 */ | ||||
| 	u64 prev_leaf_bytenr; | ||||
| 	/*
 | ||||
| 	 * A path from a root to a leaf that has a file extent item pointing to | ||||
| 	 * a given data extent should never exceed the maximum b+tree height. | ||||
| 	 */ | ||||
| 	struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; | ||||
| 	bool use_cache; | ||||
| 	struct btrfs_backref_shared_cache_entry path_cache_entries[BTRFS_MAX_LEVEL]; | ||||
| 	bool use_path_cache; | ||||
| 	/*
 | ||||
| 	 * Cache the sharedness result for the last few extents we have found, | ||||
| 	 * but only for extents for which we have multiple file extent items | ||||
| 	 * that point to them. | ||||
| 	 * It's very common to have several file extent items that point to the | ||||
| 	 * same extent (bytenr) but with different offsets and lengths. This | ||||
| 	 * typically happens for COW writes, partial writes into prealloc | ||||
| 	 * extents, NOCOW writes after snapshoting a root, hole punching or | ||||
| 	 * reflinking within the same file (less common perhaps). | ||||
| 	 * So keep a small cache with the lookup results for the extent pointed | ||||
| 	 * by the last few file extent items. This cache is checked, with a | ||||
| 	 * linear scan, whenever btrfs_is_data_extent_shared() is called, so | ||||
| 	 * it must be small so that it does not negatively affect performance in | ||||
| 	 * case we don't have multiple file extent items that point to the same | ||||
| 	 * data extent. | ||||
| 	 */ | ||||
| 	struct { | ||||
| 		u64 bytenr; | ||||
| 		bool is_shared; | ||||
| 	} prev_extents_cache[BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE]; | ||||
| 	/*
 | ||||
| 	 * The slot in the prev_extents_cache array that will be used for | ||||
| 	 * storing the sharedness result of a new data extent. | ||||
| 	 */ | ||||
| 	int prev_extents_cache_slot; | ||||
| }; | ||||
| 
 | ||||
| typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, | ||||
| 		void *ctx); | ||||
| struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void); | ||||
| void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx); | ||||
| 
 | ||||
| int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | ||||
| 			struct btrfs_path *path, struct btrfs_key *found_key, | ||||
|  | @ -43,11 +203,9 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
| 			    struct btrfs_key *key, struct btrfs_extent_item *ei, | ||||
| 			    u32 item_size, u64 *out_root, u8 *out_level); | ||||
| 
 | ||||
| int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | ||||
| 				u64 extent_item_objectid, | ||||
| 				u64 extent_offset, int search_commit_root, | ||||
| 				iterate_extent_inodes_t *iterate, void *ctx, | ||||
| 				bool ignore_offset); | ||||
| int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, | ||||
| 			  bool search_commit_root, | ||||
| 			  iterate_extent_inodes_t *iterate, void *user_ctx); | ||||
| 
 | ||||
| int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | ||||
| 				struct btrfs_path *path, void *ctx, | ||||
|  | @ -55,13 +213,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
| 
 | ||||
| int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | ||||
| 
 | ||||
| int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | ||||
| 			 struct btrfs_fs_info *fs_info, u64 bytenr, | ||||
| 			 u64 time_seq, struct ulist **leafs, | ||||
| 			 const u64 *extent_item_pos, bool ignore_offset); | ||||
| int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | ||||
| 			 struct btrfs_fs_info *fs_info, u64 bytenr, | ||||
| 			 u64 time_seq, struct ulist **roots, | ||||
| int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx); | ||||
| int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx, | ||||
| 			 bool skip_commit_root_sem); | ||||
| char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | ||||
| 			u32 name_len, unsigned long name_off, | ||||
|  | @ -77,10 +230,9 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
| 			  u64 start_off, struct btrfs_path *path, | ||||
| 			  struct btrfs_inode_extref **ret_extref, | ||||
| 			  u64 *found_off); | ||||
| int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, | ||||
| int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, | ||||
| 				u64 extent_gen, | ||||
| 				struct ulist *roots, struct ulist *tmp, | ||||
| 				struct btrfs_backref_shared_cache *cache); | ||||
| 				struct btrfs_backref_share_check_ctx *ctx); | ||||
| 
 | ||||
| int __init btrfs_prelim_ref_init(void); | ||||
| void __cold btrfs_prelim_ref_exit(void); | ||||
|  | @ -111,8 +263,7 @@ struct btrfs_backref_iter { | |||
| 	u32 end_ptr; | ||||
| }; | ||||
| 
 | ||||
| struct btrfs_backref_iter *btrfs_backref_iter_alloc( | ||||
| 		struct btrfs_fs_info *fs_info, gfp_t gfp_flag); | ||||
| struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info); | ||||
| 
 | ||||
| static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter) | ||||
| { | ||||
|  |  | |||
							
								
								
									
										381
									
								
								fs/btrfs/bio.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										381
									
								
								fs/btrfs/bio.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,381 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| /*
 | ||||
|  * Copyright (C) 2007 Oracle.  All rights reserved. | ||||
|  * Copyright (C) 2022 Christoph Hellwig. | ||||
|  */ | ||||
| 
 | ||||
| #include <linux/bio.h> | ||||
| #include "bio.h" | ||||
| #include "ctree.h" | ||||
| #include "volumes.h" | ||||
| #include "raid56.h" | ||||
| #include "async-thread.h" | ||||
| #include "check-integrity.h" | ||||
| #include "dev-replace.h" | ||||
| #include "rcu-string.h" | ||||
| #include "zoned.h" | ||||
| 
 | ||||
| static struct bio_set btrfs_bioset; | ||||
| 
 | ||||
| /*
 | ||||
|  * Initialize a btrfs_bio structure.  This skips the embedded bio itself as it | ||||
|  * is already initialized by the block layer. | ||||
|  */ | ||||
| static inline void btrfs_bio_init(struct btrfs_bio *bbio, | ||||
| 				  btrfs_bio_end_io_t end_io, void *private) | ||||
| { | ||||
| 	memset(bbio, 0, offsetof(struct btrfs_bio, bio)); | ||||
| 	bbio->end_io = end_io; | ||||
| 	bbio->private = private; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Allocate a btrfs_bio structure.  The btrfs_bio is the main I/O container for | ||||
|  * btrfs, and is used for all I/O submitted through btrfs_submit_bio. | ||||
|  * | ||||
|  * Just like the underlying bio_alloc_bioset it will not fail as it is backed by | ||||
|  * a mempool. | ||||
|  */ | ||||
| struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, | ||||
| 			    btrfs_bio_end_io_t end_io, void *private) | ||||
| { | ||||
| 	struct bio *bio; | ||||
| 
 | ||||
| 	bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset); | ||||
| 	btrfs_bio_init(btrfs_bio(bio), end_io, private); | ||||
| 	return bio; | ||||
| } | ||||
| 
 | ||||
| struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size, | ||||
| 				    btrfs_bio_end_io_t end_io, void *private) | ||||
| { | ||||
| 	struct bio *bio; | ||||
| 	struct btrfs_bio *bbio; | ||||
| 
 | ||||
| 	ASSERT(offset <= UINT_MAX && size <= UINT_MAX); | ||||
| 
 | ||||
| 	bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset); | ||||
| 	bbio = btrfs_bio(bio); | ||||
| 	btrfs_bio_init(bbio, end_io, private); | ||||
| 
 | ||||
| 	bio_trim(bio, offset >> 9, size >> 9); | ||||
| 	bbio->iter = bio->bi_iter; | ||||
| 	return bio; | ||||
| } | ||||
| 
 | ||||
| static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) | ||||
| { | ||||
| 	if (!dev || !dev->bdev) | ||||
| 		return; | ||||
| 	if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (btrfs_op(bio) == BTRFS_MAP_WRITE) | ||||
| 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||||
| 	if (!(bio->bi_opf & REQ_RAHEAD)) | ||||
| 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); | ||||
| 	if (bio->bi_opf & REQ_PREFLUSH) | ||||
| 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); | ||||
| } | ||||
| 
 | ||||
| static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info, | ||||
| 						struct bio *bio) | ||||
| { | ||||
| 	if (bio->bi_opf & REQ_META) | ||||
| 		return fs_info->endio_meta_workers; | ||||
| 	return fs_info->endio_workers; | ||||
| } | ||||
| 
 | ||||
| static void btrfs_end_bio_work(struct work_struct *work) | ||||
| { | ||||
| 	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work); | ||||
| 
 | ||||
| 	bbio->end_io(bbio); | ||||
| } | ||||
| 
 | ||||
| static void btrfs_simple_end_io(struct bio *bio) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = bio->bi_private; | ||||
| 	struct btrfs_bio *bbio = btrfs_bio(bio); | ||||
| 
 | ||||
| 	btrfs_bio_counter_dec(fs_info); | ||||
| 
 | ||||
| 	if (bio->bi_status) | ||||
| 		btrfs_log_dev_io_error(bio, bbio->device); | ||||
| 
 | ||||
| 	if (bio_op(bio) == REQ_OP_READ) { | ||||
| 		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work); | ||||
| 		queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work); | ||||
| 	} else { | ||||
| 		bbio->end_io(bbio); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void btrfs_raid56_end_io(struct bio *bio) | ||||
| { | ||||
| 	struct btrfs_io_context *bioc = bio->bi_private; | ||||
| 	struct btrfs_bio *bbio = btrfs_bio(bio); | ||||
| 
 | ||||
| 	btrfs_bio_counter_dec(bioc->fs_info); | ||||
| 	bbio->mirror_num = bioc->mirror_num; | ||||
| 	bbio->end_io(bbio); | ||||
| 
 | ||||
| 	btrfs_put_bioc(bioc); | ||||
| } | ||||
| 
 | ||||
| static void btrfs_orig_write_end_io(struct bio *bio) | ||||
| { | ||||
| 	struct btrfs_io_stripe *stripe = bio->bi_private; | ||||
| 	struct btrfs_io_context *bioc = stripe->bioc; | ||||
| 	struct btrfs_bio *bbio = btrfs_bio(bio); | ||||
| 
 | ||||
| 	btrfs_bio_counter_dec(bioc->fs_info); | ||||
| 
 | ||||
| 	if (bio->bi_status) { | ||||
| 		atomic_inc(&bioc->error); | ||||
| 		btrfs_log_dev_io_error(bio, stripe->dev); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Only send an error to the higher layers if it is beyond the tolerance | ||||
| 	 * threshold. | ||||
| 	 */ | ||||
| 	if (atomic_read(&bioc->error) > bioc->max_errors) | ||||
| 		bio->bi_status = BLK_STS_IOERR; | ||||
| 	else | ||||
| 		bio->bi_status = BLK_STS_OK; | ||||
| 
 | ||||
| 	bbio->end_io(bbio); | ||||
| 	btrfs_put_bioc(bioc); | ||||
| } | ||||
| 
 | ||||
| static void btrfs_clone_write_end_io(struct bio *bio) | ||||
| { | ||||
| 	struct btrfs_io_stripe *stripe = bio->bi_private; | ||||
| 
 | ||||
| 	if (bio->bi_status) { | ||||
| 		atomic_inc(&stripe->bioc->error); | ||||
| 		btrfs_log_dev_io_error(bio, stripe->dev); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Pass on control to the original bio this one was cloned from */ | ||||
| 	bio_endio(stripe->bioc->orig_bio); | ||||
| 	bio_put(bio); | ||||
| } | ||||
| 
 | ||||
| static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) | ||||
| { | ||||
| 	if (!dev || !dev->bdev || | ||||
| 	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || | ||||
| 	    (btrfs_op(bio) == BTRFS_MAP_WRITE && | ||||
| 	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { | ||||
| 		bio_io_error(bio); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	bio_set_dev(bio, dev->bdev); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * For zone append writing, bi_sector must point the beginning of the | ||||
| 	 * zone | ||||
| 	 */ | ||||
| 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) { | ||||
| 		u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; | ||||
| 
 | ||||
| 		if (btrfs_dev_is_sequential(dev, physical)) { | ||||
| 			u64 zone_start = round_down(physical, | ||||
| 						    dev->fs_info->zone_size); | ||||
| 
 | ||||
| 			bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT; | ||||
| 		} else { | ||||
| 			bio->bi_opf &= ~REQ_OP_ZONE_APPEND; | ||||
| 			bio->bi_opf |= REQ_OP_WRITE; | ||||
| 		} | ||||
| 	} | ||||
| 	btrfs_debug_in_rcu(dev->fs_info, | ||||
| 	"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", | ||||
| 		__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, | ||||
| 		(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev), | ||||
| 		dev->devid, bio->bi_iter.bi_size); | ||||
| 
 | ||||
| 	btrfsic_check_bio(bio); | ||||
| 	submit_bio(bio); | ||||
| } | ||||
| 
 | ||||
| static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr) | ||||
| { | ||||
| 	struct bio *orig_bio = bioc->orig_bio, *bio; | ||||
| 
 | ||||
| 	ASSERT(bio_op(orig_bio) != REQ_OP_READ); | ||||
| 
 | ||||
| 	/* Reuse the bio embedded into the btrfs_bio for the last mirror */ | ||||
| 	if (dev_nr == bioc->num_stripes - 1) { | ||||
| 		bio = orig_bio; | ||||
| 		bio->bi_end_io = btrfs_orig_write_end_io; | ||||
| 	} else { | ||||
| 		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set); | ||||
| 		bio_inc_remaining(orig_bio); | ||||
| 		bio->bi_end_io = btrfs_clone_write_end_io; | ||||
| 	} | ||||
| 
 | ||||
| 	bio->bi_private = &bioc->stripes[dev_nr]; | ||||
| 	bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT; | ||||
| 	bioc->stripes[dev_nr].bioc = bioc; | ||||
| 	btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio); | ||||
| } | ||||
| 
 | ||||
| void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num) | ||||
| { | ||||
| 	u64 logical = bio->bi_iter.bi_sector << 9; | ||||
| 	u64 length = bio->bi_iter.bi_size; | ||||
| 	u64 map_length = length; | ||||
| 	struct btrfs_io_context *bioc = NULL; | ||||
| 	struct btrfs_io_stripe smap; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	btrfs_bio_counter_inc_blocked(fs_info); | ||||
| 	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, | ||||
| 				&bioc, &smap, &mirror_num, 1); | ||||
| 	if (ret) { | ||||
| 		btrfs_bio_counter_dec(fs_info); | ||||
| 		btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret)); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (map_length < length) { | ||||
| 		btrfs_crit(fs_info, | ||||
| 			   "mapping failed logical %llu bio len %llu len %llu", | ||||
| 			   logical, length, map_length); | ||||
| 		BUG(); | ||||
| 	} | ||||
| 
 | ||||
| 	if (!bioc) { | ||||
| 		/* Single mirror read/write fast path */ | ||||
| 		btrfs_bio(bio)->mirror_num = mirror_num; | ||||
| 		btrfs_bio(bio)->device = smap.dev; | ||||
| 		bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT; | ||||
| 		bio->bi_private = fs_info; | ||||
| 		bio->bi_end_io = btrfs_simple_end_io; | ||||
| 		btrfs_submit_dev_bio(smap.dev, bio); | ||||
| 	} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { | ||||
| 		/* Parity RAID write or read recovery */ | ||||
| 		bio->bi_private = bioc; | ||||
| 		bio->bi_end_io = btrfs_raid56_end_io; | ||||
| 		if (bio_op(bio) == REQ_OP_READ) | ||||
| 			raid56_parity_recover(bio, bioc, mirror_num); | ||||
| 		else | ||||
| 			raid56_parity_write(bio, bioc); | ||||
| 	} else { | ||||
| 		/* Write to multiple mirrors */ | ||||
| 		int total_devs = bioc->num_stripes; | ||||
| 		int dev_nr; | ||||
| 
 | ||||
| 		bioc->orig_bio = bio; | ||||
| 		for (dev_nr = 0; dev_nr < total_devs; dev_nr++) | ||||
| 			btrfs_submit_mirrored_bio(bioc, dev_nr); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Submit a repair write. | ||||
|  * | ||||
|  * This bypasses btrfs_submit_bio deliberately, as that writes all copies in a | ||||
|  * RAID setup.  Here we only want to write the one bad copy, so we do the | ||||
|  * mapping ourselves and submit the bio directly. | ||||
|  * | ||||
|  * The I/O is issued sychronously to block the repair read completion from | ||||
|  * freeing the bio. | ||||
|  */ | ||||
| int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, | ||||
| 			    u64 length, u64 logical, struct page *page, | ||||
| 			    unsigned int pg_offset, int mirror_num) | ||||
| { | ||||
| 	struct btrfs_device *dev; | ||||
| 	struct bio_vec bvec; | ||||
| 	struct bio bio; | ||||
| 	u64 map_length = 0; | ||||
| 	u64 sector; | ||||
| 	struct btrfs_io_context *bioc = NULL; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); | ||||
| 	BUG_ON(!mirror_num); | ||||
| 
 | ||||
| 	if (btrfs_repair_one_zone(fs_info, logical)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	map_length = length; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Avoid races with device replace and make sure our bioc has devices | ||||
| 	 * associated to its stripes that don't go away while we are doing the | ||||
| 	 * read repair operation. | ||||
| 	 */ | ||||
| 	btrfs_bio_counter_inc_blocked(fs_info); | ||||
| 	if (btrfs_is_parity_mirror(fs_info, logical, length)) { | ||||
| 		/*
 | ||||
| 		 * Note that we don't use BTRFS_MAP_WRITE because it's supposed | ||||
| 		 * to update all raid stripes, but here we just want to correct | ||||
| 		 * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad | ||||
| 		 * stripe's dev and sector. | ||||
| 		 */ | ||||
| 		ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, | ||||
| 				      &map_length, &bioc, 0); | ||||
| 		if (ret) | ||||
| 			goto out_counter_dec; | ||||
| 		ASSERT(bioc->mirror_num == 1); | ||||
| 	} else { | ||||
| 		ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, | ||||
| 				      &map_length, &bioc, mirror_num); | ||||
| 		if (ret) | ||||
| 			goto out_counter_dec; | ||||
| 		BUG_ON(mirror_num != bioc->mirror_num); | ||||
| 	} | ||||
| 
 | ||||
| 	sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; | ||||
| 	dev = bioc->stripes[bioc->mirror_num - 1].dev; | ||||
| 	btrfs_put_bioc(bioc); | ||||
| 
 | ||||
| 	if (!dev || !dev->bdev || | ||||
| 	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { | ||||
| 		ret = -EIO; | ||||
| 		goto out_counter_dec; | ||||
| 	} | ||||
| 
 | ||||
| 	bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); | ||||
| 	bio.bi_iter.bi_sector = sector; | ||||
| 	__bio_add_page(&bio, page, length, pg_offset); | ||||
| 
 | ||||
| 	btrfsic_check_bio(&bio); | ||||
| 	ret = submit_bio_wait(&bio); | ||||
| 	if (ret) { | ||||
| 		/* try to remap that extent elsewhere? */ | ||||
| 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||||
| 		goto out_bio_uninit; | ||||
| 	} | ||||
| 
 | ||||
| 	btrfs_info_rl_in_rcu(fs_info, | ||||
| 		"read error corrected: ino %llu off %llu (dev %s sector %llu)", | ||||
| 			     ino, start, btrfs_dev_name(dev), sector); | ||||
| 	ret = 0; | ||||
| 
 | ||||
| out_bio_uninit: | ||||
| 	bio_uninit(&bio); | ||||
| out_counter_dec: | ||||
| 	btrfs_bio_counter_dec(fs_info); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int __init btrfs_bioset_init(void) | ||||
| { | ||||
| 	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, | ||||
| 			offsetof(struct btrfs_bio, bio), | ||||
| 			BIOSET_NEED_BVECS)) | ||||
| 		return -ENOMEM; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_bioset_exit(void) | ||||
| { | ||||
| 	bioset_exit(&btrfs_bioset); | ||||
| } | ||||
							
								
								
									
										127
									
								
								fs/btrfs/bio.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								fs/btrfs/bio.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,127 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| /*
 | ||||
|  * Copyright (C) 2007 Oracle.  All rights reserved. | ||||
|  * Copyright (C) 2022 Christoph Hellwig. | ||||
|  */ | ||||
| 
 | ||||
| #ifndef BTRFS_BIO_H | ||||
| #define BTRFS_BIO_H | ||||
| 
 | ||||
| #include <linux/bio.h> | ||||
| #include <linux/workqueue.h> | ||||
| #include "tree-checker.h" | ||||
| 
 | ||||
| struct btrfs_bio; | ||||
| struct btrfs_fs_info; | ||||
| 
 | ||||
| #define BTRFS_BIO_INLINE_CSUM_SIZE	64 | ||||
| 
 | ||||
| /*
 | ||||
|  * Maximum number of sectors for a single bio to limit the size of the | ||||
|  * checksum array.  This matches the number of bio_vecs per bio and thus the | ||||
|  * I/O size for buffered I/O. | ||||
|  */ | ||||
| #define BTRFS_MAX_BIO_SECTORS		(256) | ||||
| 
 | ||||
| typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio); | ||||
| 
 | ||||
| /*
 | ||||
|  * Additional info to pass along bio. | ||||
|  * | ||||
|  * Mostly for btrfs specific features like csum and mirror_num. | ||||
|  */ | ||||
| struct btrfs_bio { | ||||
| 	unsigned int mirror_num:7; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Extra indicator for metadata bios. | ||||
| 	 * For some btrfs bios they use pages without a mapping, thus | ||||
| 	 * we can not rely on page->mapping->host to determine if | ||||
| 	 * it's a metadata bio. | ||||
| 	 */ | ||||
| 	unsigned int is_metadata:1; | ||||
| 	struct bvec_iter iter; | ||||
| 
 | ||||
| 	/* for direct I/O */ | ||||
| 	u64 file_offset; | ||||
| 
 | ||||
| 	/* @device is for stripe IO submission. */ | ||||
| 	struct btrfs_device *device; | ||||
| 	union { | ||||
| 		/* For data checksum verification. */ | ||||
| 		struct { | ||||
| 			u8 *csum; | ||||
| 			u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; | ||||
| 		}; | ||||
| 
 | ||||
| 		/* For metadata parentness verification. */ | ||||
| 		struct btrfs_tree_parent_check parent_check; | ||||
| 	}; | ||||
| 
 | ||||
| 	/* End I/O information supplied to btrfs_bio_alloc */ | ||||
| 	btrfs_bio_end_io_t end_io; | ||||
| 	void *private; | ||||
| 
 | ||||
| 	/* For read end I/O handling */ | ||||
| 	struct work_struct end_io_work; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This member must come last, bio_alloc_bioset will allocate enough | ||||
| 	 * bytes for entire btrfs_bio but relies on bio being last. | ||||
| 	 */ | ||||
| 	struct bio bio; | ||||
| }; | ||||
| 
 | ||||
| static inline struct btrfs_bio *btrfs_bio(struct bio *bio) | ||||
| { | ||||
| 	return container_of(bio, struct btrfs_bio, bio); | ||||
| } | ||||
| 
 | ||||
| int __init btrfs_bioset_init(void); | ||||
| void __cold btrfs_bioset_exit(void); | ||||
| 
 | ||||
| struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, | ||||
| 			    btrfs_bio_end_io_t end_io, void *private); | ||||
| struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size, | ||||
| 				    btrfs_bio_end_io_t end_io, void *private); | ||||
| 
 | ||||
| 
 | ||||
| static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) | ||||
| { | ||||
| 	bbio->bio.bi_status = status; | ||||
| 	bbio->end_io(bbio); | ||||
| } | ||||
| 
 | ||||
| static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio) | ||||
| { | ||||
| 	if (bbio->is_metadata) | ||||
| 		return; | ||||
| 	if (bbio->csum != bbio->csum_inline) { | ||||
| 		kfree(bbio->csum); | ||||
| 		bbio->csum = NULL; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Iterate through a btrfs_bio (@bbio) on a per-sector basis. | ||||
|  * | ||||
|  * bvl        - struct bio_vec | ||||
|  * bbio       - struct btrfs_bio | ||||
|  * iters      - struct bvec_iter | ||||
|  * bio_offset - unsigned int | ||||
|  */ | ||||
| #define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset)	\ | ||||
| 	for ((iter) = (bbio)->iter, (bio_offset) = 0;			\ | ||||
| 	     (iter).bi_size &&					\ | ||||
| 	     (((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1);	\ | ||||
| 	     (bio_offset) += fs_info->sectorsize,			\ | ||||
| 	     bio_advance_iter_single(&(bbio)->bio, &(iter),		\ | ||||
| 	     (fs_info)->sectorsize)) | ||||
| 
 | ||||
| void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, | ||||
| 		      int mirror_num); | ||||
| int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, | ||||
| 			    u64 length, u64 logical, struct page *page, | ||||
| 			    unsigned int pg_offset, int mirror_num); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -17,6 +17,21 @@ | |||
| #include "discard.h" | ||||
| #include "raid56.h" | ||||
| #include "zoned.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_DEBUG | ||||
| int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = block_group->fs_info; | ||||
| 
 | ||||
| 	return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) && | ||||
| 		block_group->flags & BTRFS_BLOCK_GROUP_METADATA) || | ||||
| 	       (btrfs_test_opt(fs_info, FRAGMENT_DATA) && | ||||
| 		block_group->flags &  BTRFS_BLOCK_GROUP_DATA); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Return target flags in extended format or 0 if restripe for this chunk_type | ||||
|  | @ -284,7 +299,7 @@ struct btrfs_block_group *btrfs_next_block_group( | |||
| 	return cache; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Check if we can do a NOCOW write for a given extent. | ||||
|  * | ||||
|  * @fs_info:       The filesystem information object. | ||||
|  | @ -325,11 +340,9 @@ struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, | |||
| 	return bg; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Decrement the number of NOCOW writers in a block group. | ||||
|  * | ||||
|  * @bg:       The block group. | ||||
|  * | ||||
|  * This is meant to be called after a previous call to btrfs_inc_nocow_writers(), | ||||
|  * and on the block group returned by that call. Typically this is called after | ||||
|  * creating an ordered extent for a NOCOW write, to prevent races with scrub and | ||||
|  | @ -1527,6 +1540,30 @@ static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info) | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed) | ||||
| { | ||||
| 	const struct btrfs_space_info *space_info = bg->space_info; | ||||
| 	const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold); | ||||
| 	const u64 new_val = bg->used; | ||||
| 	const u64 old_val = new_val + bytes_freed; | ||||
| 	u64 thresh; | ||||
| 
 | ||||
| 	if (reclaim_thresh == 0) | ||||
| 		return false; | ||||
| 
 | ||||
| 	thresh = mult_perc(bg->length, reclaim_thresh); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we were below the threshold before don't reclaim, we are likely a | ||||
| 	 * brand new block group and we don't want to relocate new block groups. | ||||
| 	 */ | ||||
| 	if (old_val < thresh) | ||||
| 		return false; | ||||
| 	if (new_val >= thresh) | ||||
| 		return false; | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| void btrfs_reclaim_bgs_work(struct work_struct *work) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = | ||||
|  | @ -1594,6 +1631,40 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) | |||
| 			up_write(&space_info->groups_sem); | ||||
| 			goto next; | ||||
| 		} | ||||
| 		if (bg->used == 0) { | ||||
| 			/*
 | ||||
| 			 * It is possible that we trigger relocation on a block | ||||
| 			 * group as its extents are deleted and it first goes | ||||
| 			 * below the threshold, then shortly after goes empty. | ||||
| 			 * | ||||
| 			 * In this case, relocating it does delete it, but has | ||||
| 			 * some overhead in relocation specific metadata, looking | ||||
| 			 * for the non-existent extents and running some extra | ||||
| 			 * transactions, which we can avoid by using one of the | ||||
| 			 * other mechanisms for dealing with empty block groups. | ||||
| 			 */ | ||||
| 			if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) | ||||
| 				btrfs_mark_bg_unused(bg); | ||||
| 			spin_unlock(&bg->lock); | ||||
| 			up_write(&space_info->groups_sem); | ||||
| 			goto next; | ||||
| 
 | ||||
| 		} | ||||
| 		/*
 | ||||
| 		 * The block group might no longer meet the reclaim condition by | ||||
| 		 * the time we get around to reclaiming it, so to avoid | ||||
| 		 * reclaiming overly full block_groups, skip reclaiming them. | ||||
| 		 * | ||||
| 		 * Since the decision making process also depends on the amount | ||||
| 		 * being freed, pass in a fake giant value to skip that extra | ||||
| 		 * check, which is more meaningful when adding to the list in | ||||
| 		 * the first place. | ||||
| 		 */ | ||||
| 		if (!should_reclaim_block_group(bg, bg->length)) { | ||||
| 			spin_unlock(&bg->lock); | ||||
| 			up_write(&space_info->groups_sem); | ||||
| 			goto next; | ||||
| 		} | ||||
| 		spin_unlock(&bg->lock); | ||||
| 
 | ||||
| 		/* Get out fast, in case we're unmounting the filesystem */ | ||||
|  | @ -1740,8 +1811,8 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
| 	write_sequnlock(&fs_info->profiles_lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Map a physical disk address to a list of logical addresses | ||||
| /*
 | ||||
|  * Map a physical disk address to a list of logical addresses. | ||||
|  * | ||||
|  * @fs_info:       the filesystem | ||||
|  * @chunk_start:   logical address of block group | ||||
|  | @ -2001,6 +2072,7 @@ static int read_one_block_group(struct btrfs_fs_info *info, | |||
| 
 | ||||
| 	cache->length = key->offset; | ||||
| 	cache->used = btrfs_stack_block_group_used(bgi); | ||||
| 	cache->commit_used = cache->used; | ||||
| 	cache->flags = btrfs_stack_block_group_flags(bgi); | ||||
| 	cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi); | ||||
| 
 | ||||
|  | @ -2481,7 +2553,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran | |||
| 	cache->global_root_id = calculate_global_root_id(fs_info, cache->start); | ||||
| 
 | ||||
| 	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) | ||||
| 		cache->needs_free_space = 1; | ||||
| 		set_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &cache->runtime_flags); | ||||
| 
 | ||||
| 	ret = btrfs_load_block_group_zone_info(cache, true); | ||||
| 	if (ret) { | ||||
|  | @ -2692,6 +2764,25 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, | |||
| 	struct extent_buffer *leaf; | ||||
| 	struct btrfs_block_group_item bgi; | ||||
| 	struct btrfs_key key; | ||||
| 	u64 old_commit_used; | ||||
| 	u64 used; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Block group items update can be triggered out of commit transaction | ||||
| 	 * critical section, thus we need a consistent view of used bytes. | ||||
| 	 * We cannot use cache->used directly outside of the spin lock, as it | ||||
| 	 * may be changed. | ||||
| 	 */ | ||||
| 	spin_lock(&cache->lock); | ||||
| 	old_commit_used = cache->commit_used; | ||||
| 	used = cache->used; | ||||
| 	/* No change in used bytes, can safely skip it. */ | ||||
| 	if (cache->commit_used == used) { | ||||
| 		spin_unlock(&cache->lock); | ||||
| 		return 0; | ||||
| 	} | ||||
| 	cache->commit_used = used; | ||||
| 	spin_unlock(&cache->lock); | ||||
| 
 | ||||
| 	key.objectid = cache->start; | ||||
| 	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | ||||
|  | @ -2706,7 +2797,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 	leaf = path->nodes[0]; | ||||
| 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||||
| 	btrfs_set_stack_block_group_used(&bgi, cache->used); | ||||
| 	btrfs_set_stack_block_group_used(&bgi, used); | ||||
| 	btrfs_set_stack_block_group_chunk_objectid(&bgi, | ||||
| 						   cache->global_root_id); | ||||
| 	btrfs_set_stack_block_group_flags(&bgi, cache->flags); | ||||
|  | @ -2714,6 +2805,12 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, | |||
| 	btrfs_mark_buffer_dirty(leaf); | ||||
| fail: | ||||
| 	btrfs_release_path(path); | ||||
| 	/* We didn't update the block group item, need to revert @commit_used. */ | ||||
| 	if (ret < 0) { | ||||
| 		spin_lock(&cache->lock); | ||||
| 		cache->commit_used = old_commit_used; | ||||
| 		spin_unlock(&cache->lock); | ||||
| 	} | ||||
| 	return ret; | ||||
| 
 | ||||
| } | ||||
|  | @ -3211,31 +3308,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static inline bool should_reclaim_block_group(struct btrfs_block_group *bg, | ||||
| 					      u64 bytes_freed) | ||||
| { | ||||
| 	const struct btrfs_space_info *space_info = bg->space_info; | ||||
| 	const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold); | ||||
| 	const u64 new_val = bg->used; | ||||
| 	const u64 old_val = new_val + bytes_freed; | ||||
| 	u64 thresh; | ||||
| 
 | ||||
| 	if (reclaim_thresh == 0) | ||||
| 		return false; | ||||
| 
 | ||||
| 	thresh = div_factor_fine(bg->length, reclaim_thresh); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we were below the threshold before don't reclaim, we are likely a | ||||
| 	 * brand new block group and we don't want to relocate new block groups. | ||||
| 	 */ | ||||
| 	if (old_val < thresh) | ||||
| 		return false; | ||||
| 	if (new_val >= thresh) | ||||
| 		return false; | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| int btrfs_update_block_group(struct btrfs_trans_handle *trans, | ||||
| 			     u64 bytenr, u64 num_bytes, bool alloc) | ||||
| { | ||||
|  | @ -3347,8 +3419,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_add_reserved_bytes - update the block_group and space info counters | ||||
| /*
 | ||||
|  * Update the block_group and space info counters. | ||||
|  * | ||||
|  * @cache:	The cache we are manipulating | ||||
|  * @ram_bytes:  The number of bytes of file content, and will be same to | ||||
|  *              @num_bytes except for the compress path. | ||||
|  | @ -3391,8 +3464,9 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_free_reserved_bytes - update the block_group and space info counters | ||||
| /*
 | ||||
|  * Update the block_group and space info counters. | ||||
|  * | ||||
|  * @cache:      The cache we are manipulating | ||||
|  * @num_bytes:  The number of bytes in question | ||||
|  * @delalloc:   The blocks are allocated for the delalloc write | ||||
|  | @ -3449,13 +3523,13 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info, | |||
| 	 */ | ||||
| 	if (force == CHUNK_ALLOC_LIMITED) { | ||||
| 		thresh = btrfs_super_total_bytes(fs_info->super_copy); | ||||
| 		thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1)); | ||||
| 		thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1)); | ||||
| 
 | ||||
| 		if (sinfo->total_bytes - bytes_used < thresh) | ||||
| 			return 1; | ||||
| 	} | ||||
| 
 | ||||
| 	if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8)) | ||||
| 	if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80)) | ||||
| 		return 0; | ||||
| 	return 1; | ||||
| } | ||||
|  |  | |||
|  | @ -55,6 +55,10 @@ enum btrfs_block_group_flags { | |||
| 	BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED, | ||||
| 	BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, | ||||
| 	BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, | ||||
| 	/* Does the block group need to be added to the free space tree? */ | ||||
| 	BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, | ||||
| 	/* Indicate that the block group is placed on a sequential zone */ | ||||
| 	BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, | ||||
| }; | ||||
| 
 | ||||
| enum btrfs_caching_type { | ||||
|  | @ -99,6 +103,12 @@ struct btrfs_block_group { | |||
| 	u64 cache_generation; | ||||
| 	u64 global_root_id; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The last committed used bytes of this block group, if the above @used | ||||
| 	 * is still the same as @commit_used, we don't need to update block | ||||
| 	 * group item of this block group. | ||||
| 	 */ | ||||
| 	u64 commit_used; | ||||
| 	/*
 | ||||
| 	 * If the free space extent count exceeds this number, convert the block | ||||
| 	 * group to bitmaps. | ||||
|  | @ -202,15 +212,6 @@ struct btrfs_block_group { | |||
| 	/* Lock for free space tree operations. */ | ||||
| 	struct mutex free_space_lock; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Does the block group need to be added to the free space tree? | ||||
| 	 * Protected by free_space_lock. | ||||
| 	 */ | ||||
| 	int needs_free_space; | ||||
| 
 | ||||
| 	/* Flag indicating this block group is placed on a sequential zone */ | ||||
| 	bool seq_zone; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Number of extents in this block group used for swap files. | ||||
| 	 * All accesses protected by the spinlock 'lock'. | ||||
|  | @ -251,16 +252,7 @@ static inline bool btrfs_is_block_group_data_only( | |||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_DEBUG | ||||
| static inline int btrfs_should_fragment_free_space( | ||||
| 		struct btrfs_block_group *block_group) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = block_group->fs_info; | ||||
| 
 | ||||
| 	return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) && | ||||
| 		block_group->flags & BTRFS_BLOCK_GROUP_METADATA) || | ||||
| 	       (btrfs_test_opt(fs_info, FRAGMENT_DATA) && | ||||
| 		block_group->flags &  BTRFS_BLOCK_GROUP_DATA); | ||||
| } | ||||
| int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group); | ||||
| #endif | ||||
| 
 | ||||
| struct btrfs_block_group *btrfs_lookup_first_block_group( | ||||
|  |  | |||
|  | @ -7,6 +7,8 @@ | |||
| #include "transaction.h" | ||||
| #include "block-group.h" | ||||
| #include "disk-io.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * HOW DO BLOCK RESERVES WORK | ||||
|  | @ -225,7 +227,7 @@ int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor) | ||||
| int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_percent) | ||||
| { | ||||
| 	u64 num_bytes = 0; | ||||
| 	int ret = -ENOSPC; | ||||
|  | @ -234,7 +236,7 @@ int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor) | |||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock(&block_rsv->lock); | ||||
| 	num_bytes = div_factor(block_rsv->size, min_factor); | ||||
| 	num_bytes = mult_perc(block_rsv->size, min_percent); | ||||
| 	if (block_rsv->reserved >= num_bytes) | ||||
| 		ret = 0; | ||||
| 	spin_unlock(&block_rsv->lock); | ||||
|  | @ -323,31 +325,6 @@ void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
| 	spin_unlock(&block_rsv->lock); | ||||
| } | ||||
| 
 | ||||
| int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||||
| 			     struct btrfs_block_rsv *dest, u64 num_bytes, | ||||
| 			     int min_factor) | ||||
| { | ||||
| 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||||
| 	u64 min_bytes; | ||||
| 
 | ||||
| 	if (global_rsv->space_info != dest->space_info) | ||||
| 		return -ENOSPC; | ||||
| 
 | ||||
| 	spin_lock(&global_rsv->lock); | ||||
| 	min_bytes = div_factor(global_rsv->size, min_factor); | ||||
| 	if (global_rsv->reserved < min_bytes + num_bytes) { | ||||
| 		spin_unlock(&global_rsv->lock); | ||||
| 		return -ENOSPC; | ||||
| 	} | ||||
| 	global_rsv->reserved -= num_bytes; | ||||
| 	if (global_rsv->reserved < global_rsv->size) | ||||
| 		global_rsv->full = false; | ||||
| 	spin_unlock(&global_rsv->lock); | ||||
| 
 | ||||
| 	btrfs_block_rsv_add_bytes(dest, num_bytes, true); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||||
|  | @ -552,5 +529,17 @@ try_reserve: | |||
| 		if (!ret) | ||||
| 			return global_rsv; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * All hope is lost, but of course our reservations are overly | ||||
| 	 * pessimistic, so instead of possibly having an ENOSPC abort here, try | ||||
| 	 * one last time to force a reservation if there's enough actual space | ||||
| 	 * on disk to make the reservation. | ||||
| 	 */ | ||||
| 	ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, blocksize, | ||||
| 					   BTRFS_RESERVE_FLUSH_EMERGENCY); | ||||
| 	if (!ret) | ||||
| 		return block_rsv; | ||||
| 
 | ||||
| 	return ERR_PTR(ret); | ||||
| } | ||||
|  |  | |||
|  | @ -4,6 +4,7 @@ | |||
| #define BTRFS_BLOCK_RSV_H | ||||
| 
 | ||||
| struct btrfs_trans_handle; | ||||
| struct btrfs_root; | ||||
| enum btrfs_reserve_flush_enum; | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -62,7 +63,7 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, | |||
| int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info, | ||||
| 			struct btrfs_block_rsv *block_rsv, u64 num_bytes, | ||||
| 			enum btrfs_reserve_flush_enum flush); | ||||
| int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor); | ||||
| int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_percent); | ||||
| int btrfs_block_rsv_refill(struct btrfs_fs_info *fs_info, | ||||
| 			   struct btrfs_block_rsv *block_rsv, u64 min_reserved, | ||||
| 			   enum btrfs_reserve_flush_enum flush); | ||||
|  | @ -70,9 +71,6 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | |||
| 			    struct btrfs_block_rsv *dst_rsv, u64 num_bytes, | ||||
| 			    bool update_size); | ||||
| int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); | ||||
| int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||||
| 			     struct btrfs_block_rsv *dest, u64 num_bytes, | ||||
| 			     int min_factor); | ||||
| void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||||
| 			       u64 num_bytes, bool update_size); | ||||
| u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, | ||||
|  |  | |||
|  | @ -411,29 +411,142 @@ static inline void btrfs_inode_split_flags(u64 inode_item_flags, | |||
| #define CSUM_FMT				"0x%*phN" | ||||
| #define CSUM_FMT_VALUE(size, bytes)		size, bytes | ||||
| 
 | ||||
| static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode, | ||||
| 		u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) | ||||
| { | ||||
| 	struct btrfs_root *root = inode->root; | ||||
| 	const u32 csum_size = root->fs_info->csum_size; | ||||
| void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num); | ||||
| void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, | ||||
| 			int mirror_num, enum btrfs_compression_type compress_type); | ||||
| void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num); | ||||
| blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio); | ||||
| blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode, | ||||
| 					      struct bio *bio, | ||||
| 					      u64 dio_file_offset); | ||||
| int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, | ||||
| 			    u32 pgoff, u8 *csum, const u8 * const csum_expected); | ||||
| int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio, | ||||
| 			  u32 bio_offset, struct page *page, u32 pgoff); | ||||
| unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, | ||||
| 				    u32 bio_offset, struct page *page, | ||||
| 				    u64 start, u64 end); | ||||
| noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | ||||
| 			      u64 *orig_start, u64 *orig_block_len, | ||||
| 			      u64 *ram_bytes, bool nowait, bool strict); | ||||
| 
 | ||||
| 	/* Output minus objectid, which is more meaningful */ | ||||
| 	if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) | ||||
| 		btrfs_warn_rl(root->fs_info, | ||||
| "csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", | ||||
| 			root->root_key.objectid, btrfs_ino(inode), | ||||
| 			logical_start, | ||||
| 			CSUM_FMT_VALUE(csum_size, csum), | ||||
| 			CSUM_FMT_VALUE(csum_size, csum_expected), | ||||
| 			mirror_num); | ||||
| 	else | ||||
| 		btrfs_warn_rl(root->fs_info, | ||||
| "csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", | ||||
| 			root->root_key.objectid, btrfs_ino(inode), | ||||
| 			logical_start, | ||||
| 			CSUM_FMT_VALUE(csum_size, csum), | ||||
| 			CSUM_FMT_VALUE(csum_size, csum_expected), | ||||
| 			mirror_num); | ||||
| } | ||||
| void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode); | ||||
| struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); | ||||
| int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); | ||||
| int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||||
| 		       struct btrfs_inode *dir, struct btrfs_inode *inode, | ||||
| 		       const struct fscrypt_str *name); | ||||
| int btrfs_add_link(struct btrfs_trans_handle *trans, | ||||
| 		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode, | ||||
| 		   const struct fscrypt_str *name, int add_backref, u64 index); | ||||
| int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry); | ||||
| int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, | ||||
| 			 int front); | ||||
| 
 | ||||
| int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); | ||||
| int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, | ||||
| 			       bool in_reclaim_context); | ||||
| int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, | ||||
| 			      unsigned int extra_bits, | ||||
| 			      struct extent_state **cached_state); | ||||
| 
 | ||||
| struct btrfs_new_inode_args { | ||||
| 	/* Input */ | ||||
| 	struct inode *dir; | ||||
| 	struct dentry *dentry; | ||||
| 	struct inode *inode; | ||||
| 	bool orphan; | ||||
| 	bool subvol; | ||||
| 
 | ||||
| 	/* Output from btrfs_new_inode_prepare(), input to btrfs_create_new_inode(). */ | ||||
| 	struct posix_acl *default_acl; | ||||
| 	struct posix_acl *acl; | ||||
| 	struct fscrypt_name fname; | ||||
| }; | ||||
| 
 | ||||
| int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, | ||||
| 			    unsigned int *trans_num_items); | ||||
| int btrfs_create_new_inode(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_new_inode_args *args); | ||||
| void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args); | ||||
| struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns, | ||||
| 				     struct inode *dir); | ||||
|  void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state, | ||||
| 			        u32 bits); | ||||
| void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, | ||||
| 				 struct extent_state *state, u32 bits); | ||||
| void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state *new, | ||||
| 				 struct extent_state *other); | ||||
| void btrfs_split_delalloc_extent(struct btrfs_inode *inode, | ||||
| 				 struct extent_state *orig, u64 split); | ||||
| void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end); | ||||
| vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); | ||||
| void btrfs_evict_inode(struct inode *inode); | ||||
| struct inode *btrfs_alloc_inode(struct super_block *sb); | ||||
| void btrfs_destroy_inode(struct inode *inode); | ||||
| void btrfs_free_inode(struct inode *inode); | ||||
| int btrfs_drop_inode(struct inode *inode); | ||||
| int __init btrfs_init_cachep(void); | ||||
| void __cold btrfs_destroy_cachep(void); | ||||
| struct inode *btrfs_iget_path(struct super_block *s, u64 ino, | ||||
| 			      struct btrfs_root *root, struct btrfs_path *path); | ||||
| struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root); | ||||
| struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, | ||||
| 				    struct page *page, size_t pg_offset, | ||||
| 				    u64 start, u64 end); | ||||
| int btrfs_update_inode(struct btrfs_trans_handle *trans, | ||||
| 		       struct btrfs_root *root, struct btrfs_inode *inode); | ||||
| int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, | ||||
| 				struct btrfs_root *root, struct btrfs_inode *inode); | ||||
| int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); | ||||
| int btrfs_orphan_cleanup(struct btrfs_root *root); | ||||
| int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size); | ||||
| void btrfs_add_delayed_iput(struct btrfs_inode *inode); | ||||
| void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||||
| 			      u64 start, u64 num_bytes, u64 min_size, | ||||
| 			      loff_t actual_len, u64 *alloc_hint); | ||||
| int btrfs_prealloc_file_range_trans(struct inode *inode, | ||||
| 				    struct btrfs_trans_handle *trans, int mode, | ||||
| 				    u64 start, u64 num_bytes, u64 min_size, | ||||
| 				    loff_t actual_len, u64 *alloc_hint); | ||||
| int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, | ||||
| 			     u64 start, u64 end, int *page_started, | ||||
| 			     unsigned long *nr_written, struct writeback_control *wbc); | ||||
| int btrfs_writepage_cow_fixup(struct page *page); | ||||
| void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, | ||||
| 					  struct page *page, u64 start, | ||||
| 					  u64 end, bool uptodate); | ||||
| int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info, | ||||
| 					     int compress_type); | ||||
| int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, | ||||
| 					  u64 file_offset, u64 disk_bytenr, | ||||
| 					  u64 disk_io_size, | ||||
| 					  struct page **pages); | ||||
| ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, | ||||
| 			   struct btrfs_ioctl_encoded_io_args *encoded); | ||||
| ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, | ||||
| 			       const struct btrfs_ioctl_encoded_io_args *encoded); | ||||
| 
 | ||||
| ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, | ||||
| 		       size_t done_before); | ||||
| struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, | ||||
| 				  size_t done_before); | ||||
| 
 | ||||
| extern const struct dentry_operations btrfs_dentry_operations; | ||||
| 
 | ||||
| /* Inode locking type flags, by default the exclusive lock is taken. */ | ||||
| enum btrfs_ilock_type { | ||||
| 	ENUM_BIT(BTRFS_ILOCK_SHARED), | ||||
| 	ENUM_BIT(BTRFS_ILOCK_TRY), | ||||
| 	ENUM_BIT(BTRFS_ILOCK_MMAP), | ||||
| }; | ||||
| 
 | ||||
| int btrfs_inode_lock(struct btrfs_inode *inode, unsigned int ilock_flags); | ||||
| void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags); | ||||
| void btrfs_update_inode_bytes(struct btrfs_inode *inode, const u64 add_bytes, | ||||
| 			      const u64 del_bytes); | ||||
| void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end); | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -82,6 +82,7 @@ | |||
| #include <linux/mm.h> | ||||
| #include <linux/string.h> | ||||
| #include <crypto/hash.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
|  | @ -92,6 +93,7 @@ | |||
| #include "check-integrity.h" | ||||
| #include "rcu-string.h" | ||||
| #include "compression.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 | ||||
| #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 | ||||
|  | @ -755,7 +757,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 			btrfs_info_in_rcu(fs_info, | ||||
| 			"new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)", | ||||
| 				     superblock_bdev, | ||||
| 				     rcu_str_deref(device->name), dev_bytenr, | ||||
| 				     btrfs_dev_name(device), dev_bytenr, | ||||
| 				     dev_state->bdev, dev_bytenr, | ||||
| 				     superblock_mirror_num); | ||||
| 		list_add(&superblock_tmp->all_blocks_node, | ||||
|  |  | |||
|  | @ -23,16 +23,19 @@ | |||
| #include <crypto/hash.h> | ||||
| #include "misc.h" | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
| #include "btrfs_inode.h" | ||||
| #include "volumes.h" | ||||
| #include "bio.h" | ||||
| #include "ordered-data.h" | ||||
| #include "compression.h" | ||||
| #include "extent_io.h" | ||||
| #include "extent_map.h" | ||||
| #include "subpage.h" | ||||
| #include "zoned.h" | ||||
| #include "file-item.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; | ||||
| 
 | ||||
|  | @ -116,7 +119,7 @@ static int compression_decompress_bio(struct list_head *ws, | |||
| } | ||||
| 
 | ||||
| static int compression_decompress(int type, struct list_head *ws, | ||||
|                unsigned char *data_in, struct page *dest_page, | ||||
|                const u8 *data_in, struct page *dest_page, | ||||
|                unsigned long start_byte, size_t srclen, size_t destlen) | ||||
| { | ||||
| 	switch (type) { | ||||
|  | @ -183,7 +186,7 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio) | |||
| 		u64 start = bbio->file_offset + offset; | ||||
| 
 | ||||
| 		if (!status && | ||||
| 		    (!csum || !btrfs_check_data_csum(inode, bbio, offset, | ||||
| 		    (!csum || !btrfs_check_data_csum(bi, bbio, offset, | ||||
| 						     bv.bv_page, bv.bv_offset))) { | ||||
| 			btrfs_clean_io_failure(bi, start, bv.bv_page, | ||||
| 					       bv.bv_offset); | ||||
|  | @ -191,9 +194,9 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio) | |||
| 			int ret; | ||||
| 
 | ||||
| 			refcount_inc(&cb->pending_ios); | ||||
| 			ret = btrfs_repair_one_sector(inode, bbio, offset, | ||||
| 			ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset, | ||||
| 						      bv.bv_page, bv.bv_offset, | ||||
| 						      btrfs_submit_data_read_bio); | ||||
| 						      true); | ||||
| 			if (ret) { | ||||
| 				refcount_dec(&cb->pending_ios); | ||||
| 				status = errno_to_blk_status(ret); | ||||
|  | @ -1229,7 +1232,7 @@ static int btrfs_decompress_bio(struct compressed_bio *cb) | |||
|  * single page, and we want to read a single page out of it. | ||||
|  * start_byte tells us the offset into the compressed data we're interested in | ||||
|  */ | ||||
| int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||||
| int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page, | ||||
| 		     unsigned long start_byte, size_t srclen, size_t destlen) | ||||
| { | ||||
| 	struct list_head *workspace; | ||||
|  | @ -1243,12 +1246,13 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| void __init btrfs_init_compress(void) | ||||
| int __init btrfs_init_compress(void) | ||||
| { | ||||
| 	btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE); | ||||
| 	btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB); | ||||
| 	btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO); | ||||
| 	zstd_init_workspace_manager(); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_exit_compress(void) | ||||
|  |  | |||
|  | @ -6,6 +6,7 @@ | |||
| #ifndef BTRFS_COMPRESSION_H | ||||
| #define BTRFS_COMPRESSION_H | ||||
| 
 | ||||
| #include <linux/blk_types.h> | ||||
| #include <linux/sizes.h> | ||||
| 
 | ||||
| struct btrfs_inode; | ||||
|  | @ -77,7 +78,7 @@ static inline unsigned int btrfs_compress_level(unsigned int type_level) | |||
| 	return ((type_level & 0xF0) >> 4); | ||||
| } | ||||
| 
 | ||||
| void __init btrfs_init_compress(void); | ||||
| int __init btrfs_init_compress(void); | ||||
| void __cold btrfs_exit_compress(void); | ||||
| 
 | ||||
| int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, | ||||
|  | @ -85,7 +86,7 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, | |||
| 			 unsigned long *out_pages, | ||||
| 			 unsigned long *total_in, | ||||
| 			 unsigned long *total_out); | ||||
| int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||||
| int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page, | ||||
| 		     unsigned long start_byte, size_t srclen, size_t destlen); | ||||
| int btrfs_decompress_buf2page(const char *buf, u32 buf_len, | ||||
| 			      struct compressed_bio *cb, u32 decompressed); | ||||
|  | @ -149,7 +150,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, | |||
| 		u64 start, struct page **pages, unsigned long *out_pages, | ||||
| 		unsigned long *total_in, unsigned long *total_out); | ||||
| int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb); | ||||
| int zlib_decompress(struct list_head *ws, unsigned char *data_in, | ||||
| int zlib_decompress(struct list_head *ws, const u8 *data_in, | ||||
| 		struct page *dest_page, unsigned long start_byte, size_t srclen, | ||||
| 		size_t destlen); | ||||
| struct list_head *zlib_alloc_workspace(unsigned int level); | ||||
|  | @ -160,7 +161,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, | |||
| 		u64 start, struct page **pages, unsigned long *out_pages, | ||||
| 		unsigned long *total_in, unsigned long *total_out); | ||||
| int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb); | ||||
| int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||||
| int lzo_decompress(struct list_head *ws, const u8 *data_in, | ||||
| 		struct page *dest_page, unsigned long start_byte, size_t srclen, | ||||
| 		size_t destlen); | ||||
| struct list_head *lzo_alloc_workspace(unsigned int level); | ||||
|  | @ -170,7 +171,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, | |||
| 		u64 start, struct page **pages, unsigned long *out_pages, | ||||
| 		unsigned long *total_in, unsigned long *total_out); | ||||
| int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb); | ||||
| int zstd_decompress(struct list_head *ws, unsigned char *data_in, | ||||
| int zstd_decompress(struct list_head *ws, const u8 *data_in, | ||||
| 		struct page *dest_page, unsigned long start_byte, size_t srclen, | ||||
| 		size_t destlen); | ||||
| void zstd_init_workspace_manager(void); | ||||
|  |  | |||
							
								
								
									
										311
									
								
								fs/btrfs/ctree.c
									
										
									
									
									
								
							
							
						
						
									
										311
									
								
								fs/btrfs/ctree.c
									
										
									
									
									
								
							|  | @ -8,6 +8,7 @@ | |||
| #include <linux/rbtree.h> | ||||
| #include <linux/mm.h> | ||||
| #include <linux/error-injection.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
|  | @ -17,6 +18,13 @@ | |||
| #include "qgroup.h" | ||||
| #include "tree-mod-log.h" | ||||
| #include "tree-checker.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "relocation.h" | ||||
| #include "file-item.h" | ||||
| 
 | ||||
| static struct kmem_cache *btrfs_path_cachep; | ||||
| 
 | ||||
| static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root | ||||
| 		      *root, struct btrfs_path *path, int level); | ||||
|  | @ -44,6 +52,104 @@ static const struct btrfs_csums { | |||
| 				     .driver = "blake2b-256" }, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * The leaf data grows from end-to-front in the node.  this returns the address | ||||
|  * of the start of the last item, which is the stop of the leaf data stack. | ||||
|  */ | ||||
| static unsigned int leaf_data_end(const struct extent_buffer *leaf) | ||||
| { | ||||
| 	u32 nr = btrfs_header_nritems(leaf); | ||||
| 
 | ||||
| 	if (nr == 0) | ||||
| 		return BTRFS_LEAF_DATA_SIZE(leaf->fs_info); | ||||
| 	return btrfs_item_offset(leaf, nr - 1); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Move data in a @leaf (using memmove, safe for overlapping ranges). | ||||
|  * | ||||
|  * @leaf:	leaf that we're doing a memmove on | ||||
|  * @dst_offset:	item data offset we're moving to | ||||
|  * @src_offset:	item data offset were' moving from | ||||
|  * @len:	length of the data we're moving | ||||
|  * | ||||
|  * Wrapper around memmove_extent_buffer() that takes into account the header on | ||||
|  * the leaf.  The btrfs_item offset's start directly after the header, so we | ||||
|  * have to adjust any offsets to account for the header in the leaf.  This | ||||
|  * handles that math to simplify the callers. | ||||
|  */ | ||||
| static inline void memmove_leaf_data(const struct extent_buffer *leaf, | ||||
| 				     unsigned long dst_offset, | ||||
| 				     unsigned long src_offset, | ||||
| 				     unsigned long len) | ||||
| { | ||||
| 	memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, 0) + dst_offset, | ||||
| 			      btrfs_item_nr_offset(leaf, 0) + src_offset, len); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Copy item data from @src into @dst at the given @offset. | ||||
|  * | ||||
|  * @dst:	destination leaf that we're copying into | ||||
|  * @src:	source leaf that we're copying from | ||||
|  * @dst_offset:	item data offset we're copying to | ||||
|  * @src_offset:	item data offset were' copying from | ||||
|  * @len:	length of the data we're copying | ||||
|  * | ||||
|  * Wrapper around copy_extent_buffer() that takes into account the header on | ||||
|  * the leaf.  The btrfs_item offset's start directly after the header, so we | ||||
|  * have to adjust any offsets to account for the header in the leaf.  This | ||||
|  * handles that math to simplify the callers. | ||||
|  */ | ||||
| static inline void copy_leaf_data(const struct extent_buffer *dst, | ||||
| 				  const struct extent_buffer *src, | ||||
| 				  unsigned long dst_offset, | ||||
| 				  unsigned long src_offset, unsigned long len) | ||||
| { | ||||
| 	copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, 0) + dst_offset, | ||||
| 			   btrfs_item_nr_offset(src, 0) + src_offset, len); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Move items in a @leaf (using memmove). | ||||
|  * | ||||
|  * @dst:	destination leaf for the items | ||||
|  * @dst_item:	the item nr we're copying into | ||||
|  * @src_item:	the item nr we're copying from | ||||
|  * @nr_items:	the number of items to copy | ||||
|  * | ||||
|  * Wrapper around memmove_extent_buffer() that does the math to get the | ||||
|  * appropriate offsets into the leaf from the item numbers. | ||||
|  */ | ||||
| static inline void memmove_leaf_items(const struct extent_buffer *leaf, | ||||
| 				      int dst_item, int src_item, int nr_items) | ||||
| { | ||||
| 	memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, dst_item), | ||||
| 			      btrfs_item_nr_offset(leaf, src_item), | ||||
| 			      nr_items * sizeof(struct btrfs_item)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Copy items from @src into @dst at the given @offset. | ||||
|  * | ||||
|  * @dst:	destination leaf for the items | ||||
|  * @src:	source leaf for the items | ||||
|  * @dst_item:	the item nr we're copying into | ||||
|  * @src_item:	the item nr we're copying from | ||||
|  * @nr_items:	the number of items to copy | ||||
|  * | ||||
|  * Wrapper around copy_extent_buffer() that does the math to get the | ||||
|  * appropriate offsets into the leaf from the item numbers. | ||||
|  */ | ||||
| static inline void copy_leaf_items(const struct extent_buffer *dst, | ||||
| 				   const struct extent_buffer *src, | ||||
| 				   int dst_item, int src_item, int nr_items) | ||||
| { | ||||
| 	copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, dst_item), | ||||
| 			      btrfs_item_nr_offset(src, src_item), | ||||
| 			      nr_items * sizeof(struct btrfs_item)); | ||||
| } | ||||
| 
 | ||||
| int btrfs_super_csum_size(const struct btrfs_super_block *s) | ||||
| { | ||||
| 	u16 t = btrfs_super_csum_type(s); | ||||
|  | @ -78,6 +184,8 @@ size_t __attribute_const__ btrfs_get_num_csums(void) | |||
| 
 | ||||
| struct btrfs_path *btrfs_alloc_path(void) | ||||
| { | ||||
| 	might_sleep(); | ||||
| 
 | ||||
| 	return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); | ||||
| } | ||||
| 
 | ||||
|  | @ -487,7 +595,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 	} else { | ||||
| 		WARN_ON(trans->transid != btrfs_header_generation(parent)); | ||||
| 		btrfs_tree_mod_log_insert_key(parent, parent_slot, | ||||
| 					      BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); | ||||
| 					      BTRFS_MOD_LOG_KEY_REPLACE); | ||||
| 		btrfs_set_node_blockptr(parent, parent_slot, | ||||
| 					cow->start); | ||||
| 		btrfs_set_node_ptr_generation(parent, parent_slot, | ||||
|  | @ -850,19 +958,22 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, | |||
| 					   int slot) | ||||
| { | ||||
| 	int level = btrfs_header_level(parent); | ||||
| 	struct btrfs_tree_parent_check check = { 0 }; | ||||
| 	struct extent_buffer *eb; | ||||
| 	struct btrfs_key first_key; | ||||
| 
 | ||||
| 	if (slot < 0 || slot >= btrfs_header_nritems(parent)) | ||||
| 		return ERR_PTR(-ENOENT); | ||||
| 
 | ||||
| 	BUG_ON(level == 0); | ||||
| 
 | ||||
| 	btrfs_node_key_to_cpu(parent, &first_key, slot); | ||||
| 	check.level = level - 1; | ||||
| 	check.transid = btrfs_node_ptr_generation(parent, slot); | ||||
| 	check.owner_root = btrfs_header_owner(parent); | ||||
| 	check.has_first_key = true; | ||||
| 	btrfs_node_key_to_cpu(parent, &check.first_key, slot); | ||||
| 
 | ||||
| 	eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), | ||||
| 			     btrfs_header_owner(parent), | ||||
| 			     btrfs_node_ptr_generation(parent, slot), | ||||
| 			     level - 1, &first_key); | ||||
| 			     &check); | ||||
| 	if (IS_ERR(eb)) | ||||
| 		return eb; | ||||
| 	if (!extent_buffer_uptodate(eb)) { | ||||
|  | @ -1016,7 +1127,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 			struct btrfs_disk_key right_key; | ||||
| 			btrfs_node_key(right, &right_key, 0); | ||||
| 			ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, | ||||
| 					BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); | ||||
| 					BTRFS_MOD_LOG_KEY_REPLACE); | ||||
| 			BUG_ON(ret < 0); | ||||
| 			btrfs_set_node_key(parent, &right_key, pslot + 1); | ||||
| 			btrfs_mark_buffer_dirty(parent); | ||||
|  | @ -1062,7 +1173,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 		struct btrfs_disk_key mid_key; | ||||
| 		btrfs_node_key(mid, &mid_key, 0); | ||||
| 		ret = btrfs_tree_mod_log_insert_key(parent, pslot, | ||||
| 				BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); | ||||
| 						    BTRFS_MOD_LOG_KEY_REPLACE); | ||||
| 		BUG_ON(ret < 0); | ||||
| 		btrfs_set_node_key(parent, &mid_key, pslot); | ||||
| 		btrfs_mark_buffer_dirty(parent); | ||||
|  | @ -1164,7 +1275,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 			orig_slot += left_nr; | ||||
| 			btrfs_node_key(mid, &disk_key, 0); | ||||
| 			ret = btrfs_tree_mod_log_insert_key(parent, pslot, | ||||
| 					BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); | ||||
| 					BTRFS_MOD_LOG_KEY_REPLACE); | ||||
| 			BUG_ON(ret < 0); | ||||
| 			btrfs_set_node_key(parent, &disk_key, pslot); | ||||
| 			btrfs_mark_buffer_dirty(parent); | ||||
|  | @ -1218,7 +1329,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 			btrfs_node_key(right, &disk_key, 0); | ||||
| 			ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, | ||||
| 					BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); | ||||
| 					BTRFS_MOD_LOG_KEY_REPLACE); | ||||
| 			BUG_ON(ret < 0); | ||||
| 			btrfs_set_node_key(parent, &disk_key, pslot + 1); | ||||
| 			btrfs_mark_buffer_dirty(parent); | ||||
|  | @ -1421,10 +1532,10 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, | |||
| 		      const struct btrfs_key *key) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct btrfs_tree_parent_check check = { 0 }; | ||||
| 	u64 blocknr; | ||||
| 	u64 gen; | ||||
| 	struct extent_buffer *tmp; | ||||
| 	struct btrfs_key first_key; | ||||
| 	int ret; | ||||
| 	int parent_level; | ||||
| 	bool unlock_up; | ||||
|  | @ -1433,7 +1544,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, | |||
| 	blocknr = btrfs_node_blockptr(*eb_ret, slot); | ||||
| 	gen = btrfs_node_ptr_generation(*eb_ret, slot); | ||||
| 	parent_level = btrfs_header_level(*eb_ret); | ||||
| 	btrfs_node_key_to_cpu(*eb_ret, &first_key, slot); | ||||
| 	btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot); | ||||
| 	check.has_first_key = true; | ||||
| 	check.level = parent_level - 1; | ||||
| 	check.transid = gen; | ||||
| 	check.owner_root = root->root_key.objectid; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we need to read an extent buffer from disk and we are holding locks | ||||
|  | @ -1455,7 +1570,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, | |||
| 			 * parents (shared tree blocks). | ||||
| 			 */ | ||||
| 			if (btrfs_verify_level_key(tmp, | ||||
| 					parent_level - 1, &first_key, gen)) { | ||||
| 					parent_level - 1, &check.first_key, gen)) { | ||||
| 				free_extent_buffer(tmp); | ||||
| 				return -EUCLEAN; | ||||
| 			} | ||||
|  | @ -1472,7 +1587,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, | |||
| 			btrfs_unlock_up_safe(p, level + 1); | ||||
| 
 | ||||
| 		/* now we're allowed to do a blocking uptodate check */ | ||||
| 		ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key); | ||||
| 		ret = btrfs_read_extent_buffer(tmp, &check); | ||||
| 		if (ret) { | ||||
| 			free_extent_buffer(tmp); | ||||
| 			btrfs_release_path(p); | ||||
|  | @ -1502,8 +1617,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, | |||
| 	if (p->reada != READA_NONE) | ||||
| 		reada_for_search(fs_info, p, level, slot, key->objectid); | ||||
| 
 | ||||
| 	tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid, | ||||
| 			      gen, parent_level - 1, &first_key); | ||||
| 	tmp = read_tree_block(fs_info, blocknr, &check); | ||||
| 	if (IS_ERR(tmp)) { | ||||
| 		btrfs_release_path(p); | ||||
| 		return PTR_ERR(tmp); | ||||
|  | @ -1934,6 +2048,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 	int min_write_lock_level; | ||||
| 	int prev_cmp; | ||||
| 
 | ||||
| 	might_sleep(); | ||||
| 
 | ||||
| 	lowest_level = p->lowest_level; | ||||
| 	WARN_ON(lowest_level && ins_len > 0); | ||||
| 	WARN_ON(p->nodes[0] != NULL); | ||||
|  | @ -2357,7 +2473,7 @@ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Search for a valid slot for the given path. | ||||
|  * | ||||
|  * @root:	The root node of the tree. | ||||
|  | @ -2416,7 +2532,7 @@ static void fixup_low_keys(struct btrfs_path *path, | |||
| 			break; | ||||
| 		t = path->nodes[i]; | ||||
| 		ret = btrfs_tree_mod_log_insert_key(t, tslot, | ||||
| 				BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC); | ||||
| 						    BTRFS_MOD_LOG_KEY_REPLACE); | ||||
| 		BUG_ON(ret < 0); | ||||
| 		btrfs_set_node_key(t, key, tslot); | ||||
| 		btrfs_mark_buffer_dirty(path->nodes[i]); | ||||
|  | @ -2585,8 +2701,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
| 		return ret; | ||||
| 	} | ||||
| 	copy_extent_buffer(dst, src, | ||||
| 			   btrfs_node_key_ptr_offset(dst_nritems), | ||||
| 			   btrfs_node_key_ptr_offset(0), | ||||
| 			   btrfs_node_key_ptr_offset(dst, dst_nritems), | ||||
| 			   btrfs_node_key_ptr_offset(src, 0), | ||||
| 			   push_items * sizeof(struct btrfs_key_ptr)); | ||||
| 
 | ||||
| 	if (push_items < src_nritems) { | ||||
|  | @ -2594,8 +2710,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
| 		 * Don't call btrfs_tree_mod_log_insert_move() here, key removal | ||||
| 		 * was already fully logged by btrfs_tree_mod_log_eb_copy() above. | ||||
| 		 */ | ||||
| 		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | ||||
| 				      btrfs_node_key_ptr_offset(push_items), | ||||
| 		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0), | ||||
| 				      btrfs_node_key_ptr_offset(src, push_items), | ||||
| 				      (src_nritems - push_items) * | ||||
| 				      sizeof(struct btrfs_key_ptr)); | ||||
| 	} | ||||
|  | @ -2655,8 +2771,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 	} | ||||
| 	ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems); | ||||
| 	BUG_ON(ret < 0); | ||||
| 	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | ||||
| 				      btrfs_node_key_ptr_offset(0), | ||||
| 	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items), | ||||
| 				      btrfs_node_key_ptr_offset(dst, 0), | ||||
| 				      (dst_nritems) * | ||||
| 				      sizeof(struct btrfs_key_ptr)); | ||||
| 
 | ||||
|  | @ -2667,8 +2783,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 		return ret; | ||||
| 	} | ||||
| 	copy_extent_buffer(dst, src, | ||||
| 			   btrfs_node_key_ptr_offset(0), | ||||
| 			   btrfs_node_key_ptr_offset(src_nritems - push_items), | ||||
| 			   btrfs_node_key_ptr_offset(dst, 0), | ||||
| 			   btrfs_node_key_ptr_offset(src, src_nritems - push_items), | ||||
| 			   push_items * sizeof(struct btrfs_key_ptr)); | ||||
| 
 | ||||
| 	btrfs_set_header_nritems(src, src_nritems - push_items); | ||||
|  | @ -2771,13 +2887,13 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
| 			BUG_ON(ret < 0); | ||||
| 		} | ||||
| 		memmove_extent_buffer(lower, | ||||
| 			      btrfs_node_key_ptr_offset(slot + 1), | ||||
| 			      btrfs_node_key_ptr_offset(slot), | ||||
| 			      btrfs_node_key_ptr_offset(lower, slot + 1), | ||||
| 			      btrfs_node_key_ptr_offset(lower, slot), | ||||
| 			      (nritems - slot) * sizeof(struct btrfs_key_ptr)); | ||||
| 	} | ||||
| 	if (level) { | ||||
| 		ret = btrfs_tree_mod_log_insert_key(lower, slot, | ||||
| 					    BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS); | ||||
| 						    BTRFS_MOD_LOG_KEY_ADD); | ||||
| 		BUG_ON(ret < 0); | ||||
| 	} | ||||
| 	btrfs_set_node_key(lower, key, slot); | ||||
|  | @ -2854,8 +2970,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 		return ret; | ||||
| 	} | ||||
| 	copy_extent_buffer(split, c, | ||||
| 			   btrfs_node_key_ptr_offset(0), | ||||
| 			   btrfs_node_key_ptr_offset(mid), | ||||
| 			   btrfs_node_key_ptr_offset(split, 0), | ||||
| 			   btrfs_node_key_ptr_offset(c, mid), | ||||
| 			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); | ||||
| 	btrfs_set_header_nritems(split, c_nritems - mid); | ||||
| 	btrfs_set_header_nritems(c, mid); | ||||
|  | @ -2995,25 +3111,17 @@ static noinline int __push_leaf_right(struct btrfs_path *path, | |||
| 
 | ||||
| 	/* make room in the right data area */ | ||||
| 	data_end = leaf_data_end(right); | ||||
| 	memmove_extent_buffer(right, | ||||
| 			      BTRFS_LEAF_DATA_OFFSET + data_end - push_space, | ||||
| 			      BTRFS_LEAF_DATA_OFFSET + data_end, | ||||
| 			      BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); | ||||
| 	memmove_leaf_data(right, data_end - push_space, data_end, | ||||
| 			  BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); | ||||
| 
 | ||||
| 	/* copy from the left data area */ | ||||
| 	copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + | ||||
| 		     BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, | ||||
| 		     BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left), | ||||
| 		     push_space); | ||||
| 	copy_leaf_data(right, left, BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, | ||||
| 		       leaf_data_end(left), push_space); | ||||
| 
 | ||||
| 	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), | ||||
| 			      btrfs_item_nr_offset(0), | ||||
| 			      right_nritems * sizeof(struct btrfs_item)); | ||||
| 	memmove_leaf_items(right, push_items, 0, right_nritems); | ||||
| 
 | ||||
| 	/* copy the items from left to right */ | ||||
| 	copy_extent_buffer(right, left, btrfs_item_nr_offset(0), | ||||
| 		   btrfs_item_nr_offset(left_nritems - push_items), | ||||
| 		   push_items * sizeof(struct btrfs_item)); | ||||
| 	copy_leaf_items(right, left, 0, left_nritems - push_items, push_items); | ||||
| 
 | ||||
| 	/* update the item pointers */ | ||||
| 	btrfs_init_map_token(&token, right); | ||||
|  | @ -3205,19 +3313,13 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size, | |||
| 	WARN_ON(!empty && push_items == btrfs_header_nritems(right)); | ||||
| 
 | ||||
| 	/* push data from right to left */ | ||||
| 	copy_extent_buffer(left, right, | ||||
| 			   btrfs_item_nr_offset(btrfs_header_nritems(left)), | ||||
| 			   btrfs_item_nr_offset(0), | ||||
| 			   push_items * sizeof(struct btrfs_item)); | ||||
| 	copy_leaf_items(left, right, btrfs_header_nritems(left), 0, push_items); | ||||
| 
 | ||||
| 	push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - | ||||
| 		     btrfs_item_offset(right, push_items - 1); | ||||
| 
 | ||||
| 	copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + | ||||
| 		     leaf_data_end(left) - push_space, | ||||
| 		     BTRFS_LEAF_DATA_OFFSET + | ||||
| 		     btrfs_item_offset(right, push_items - 1), | ||||
| 		     push_space); | ||||
| 	copy_leaf_data(left, right, leaf_data_end(left) - push_space, | ||||
| 		       btrfs_item_offset(right, push_items - 1), push_space); | ||||
| 	old_left_nritems = btrfs_header_nritems(left); | ||||
| 	BUG_ON(old_left_nritems <= 0); | ||||
| 
 | ||||
|  | @ -3240,15 +3342,12 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size, | |||
| 	if (push_items < right_nritems) { | ||||
| 		push_space = btrfs_item_offset(right, push_items - 1) - | ||||
| 						  leaf_data_end(right); | ||||
| 		memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + | ||||
| 				      BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, | ||||
| 				      BTRFS_LEAF_DATA_OFFSET + | ||||
| 				      leaf_data_end(right), push_space); | ||||
| 		memmove_leaf_data(right, | ||||
| 				  BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, | ||||
| 				  leaf_data_end(right), push_space); | ||||
| 
 | ||||
| 		memmove_extent_buffer(right, btrfs_item_nr_offset(0), | ||||
| 			      btrfs_item_nr_offset(push_items), | ||||
| 			     (btrfs_header_nritems(right) - push_items) * | ||||
| 			     sizeof(struct btrfs_item)); | ||||
| 		memmove_leaf_items(right, 0, push_items, | ||||
| 				   btrfs_header_nritems(right) - push_items); | ||||
| 	} | ||||
| 
 | ||||
| 	btrfs_init_map_token(&token, right); | ||||
|  | @ -3380,14 +3479,10 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
| 	btrfs_set_header_nritems(right, nritems); | ||||
| 	data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l); | ||||
| 
 | ||||
| 	copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||||
| 			   btrfs_item_nr_offset(mid), | ||||
| 			   nritems * sizeof(struct btrfs_item)); | ||||
| 	copy_leaf_items(right, l, 0, mid, nritems); | ||||
| 
 | ||||
| 	copy_extent_buffer(right, l, | ||||
| 		     BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - | ||||
| 		     data_copy_size, BTRFS_LEAF_DATA_OFFSET + | ||||
| 		     leaf_data_end(l), data_copy_size); | ||||
| 	copy_leaf_data(right, l, BTRFS_LEAF_DATA_SIZE(fs_info) - data_copy_size, | ||||
| 		       leaf_data_end(l), data_copy_size); | ||||
| 
 | ||||
| 	rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid); | ||||
| 
 | ||||
|  | @ -3757,9 +3852,7 @@ static noinline int split_item(struct btrfs_path *path, | |||
| 	nritems = btrfs_header_nritems(leaf); | ||||
| 	if (slot != nritems) { | ||||
| 		/* shift the items */ | ||||
| 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), | ||||
| 				btrfs_item_nr_offset(slot), | ||||
| 				(nritems - slot) * sizeof(struct btrfs_item)); | ||||
| 		memmove_leaf_items(leaf, slot + 1, slot, nritems - slot); | ||||
| 	} | ||||
| 
 | ||||
| 	btrfs_cpu_key_to_disk(&disk_key, new_key); | ||||
|  | @ -3870,9 +3963,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) | |||
| 
 | ||||
| 	/* shift the data */ | ||||
| 	if (from_end) { | ||||
| 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + | ||||
| 			      data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + | ||||
| 			      data_end, old_data_start + new_size - data_end); | ||||
| 		memmove_leaf_data(leaf, data_end + size_diff, data_end, | ||||
| 				  old_data_start + new_size - data_end); | ||||
| 	} else { | ||||
| 		struct btrfs_disk_key disk_key; | ||||
| 		u64 offset; | ||||
|  | @ -3897,9 +3989,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) | |||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + | ||||
| 			      data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + | ||||
| 			      data_end, old_data_start - data_end); | ||||
| 		memmove_leaf_data(leaf, data_end + size_diff, data_end, | ||||
| 				  old_data_start - data_end); | ||||
| 
 | ||||
| 		offset = btrfs_disk_key_offset(&disk_key); | ||||
| 		btrfs_set_disk_key_offset(&disk_key, offset + size_diff); | ||||
|  | @ -3964,9 +4055,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) | |||
| 	} | ||||
| 
 | ||||
| 	/* shift the data */ | ||||
| 	memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + | ||||
| 		      data_end - data_size, BTRFS_LEAF_DATA_OFFSET + | ||||
| 		      data_end, old_data - data_end); | ||||
| 	memmove_leaf_data(leaf, data_end - data_size, data_end, | ||||
| 			  old_data - data_end); | ||||
| 
 | ||||
| 	data_end = old_data; | ||||
| 	old_size = btrfs_item_size(leaf, slot); | ||||
|  | @ -3979,14 +4069,15 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * setup_items_for_insert - Helper called before inserting one or more items | ||||
|  * to a leaf. Main purpose is to save stack depth by doing the bulk of the work | ||||
|  * in a function that doesn't call btrfs_search_slot | ||||
| /*
 | ||||
|  * Make space in the node before inserting one or more items. | ||||
|  * | ||||
|  * @root:	root we are inserting items to | ||||
|  * @path:	points to the leaf/slot where we are going to insert new items | ||||
|  * @batch:      information about the batch of items to insert | ||||
|  * | ||||
|  * Main purpose is to save stack depth by doing the bulk of the work in a | ||||
|  * function that doesn't call btrfs_search_slot | ||||
|  */ | ||||
| static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | ||||
| 				   const struct btrfs_item_batch *batch) | ||||
|  | @ -4049,15 +4140,11 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p | |||
| 						       ioff - batch->total_data_size); | ||||
| 		} | ||||
| 		/* shift the items */ | ||||
| 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr), | ||||
| 			      btrfs_item_nr_offset(slot), | ||||
| 			      (nritems - slot) * sizeof(struct btrfs_item)); | ||||
| 		memmove_leaf_items(leaf, slot + batch->nr, slot, nritems - slot); | ||||
| 
 | ||||
| 		/* shift the data */ | ||||
| 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + | ||||
| 				      data_end - batch->total_data_size, | ||||
| 				      BTRFS_LEAF_DATA_OFFSET + data_end, | ||||
| 				      old_data - data_end); | ||||
| 		memmove_leaf_data(leaf, data_end - batch->total_data_size, | ||||
| 				  data_end, old_data - data_end); | ||||
| 		data_end = old_data; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -4211,13 +4298,13 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, | |||
| 			BUG_ON(ret < 0); | ||||
| 		} | ||||
| 		memmove_extent_buffer(parent, | ||||
| 			      btrfs_node_key_ptr_offset(slot), | ||||
| 			      btrfs_node_key_ptr_offset(slot + 1), | ||||
| 			      btrfs_node_key_ptr_offset(parent, slot), | ||||
| 			      btrfs_node_key_ptr_offset(parent, slot + 1), | ||||
| 			      sizeof(struct btrfs_key_ptr) * | ||||
| 			      (nritems - slot - 1)); | ||||
| 	} else if (level) { | ||||
| 		ret = btrfs_tree_mod_log_insert_key(parent, slot, | ||||
| 				BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS); | ||||
| 						    BTRFS_MOD_LOG_KEY_REMOVE); | ||||
| 		BUG_ON(ret < 0); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -4292,10 +4379,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 		for (i = 0; i < nr; i++) | ||||
| 			dsize += btrfs_item_size(leaf, slot + i); | ||||
| 
 | ||||
| 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + | ||||
| 			      data_end + dsize, | ||||
| 			      BTRFS_LEAF_DATA_OFFSET + data_end, | ||||
| 			      last_off - data_end); | ||||
| 		memmove_leaf_data(leaf, data_end + dsize, data_end, | ||||
| 				  last_off - data_end); | ||||
| 
 | ||||
| 		btrfs_init_map_token(&token, leaf); | ||||
| 		for (i = slot + nr; i < nritems; i++) { | ||||
|  | @ -4305,10 +4390,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 			btrfs_set_token_item_offset(&token, i, ioff + dsize); | ||||
| 		} | ||||
| 
 | ||||
| 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), | ||||
| 			      btrfs_item_nr_offset(slot + nr), | ||||
| 			      sizeof(struct btrfs_item) * | ||||
| 			      (nritems - slot - nr)); | ||||
| 		memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr); | ||||
| 	} | ||||
| 	btrfs_set_header_nritems(leaf, nritems - nr); | ||||
| 	nritems -= nr; | ||||
|  | @ -4850,6 +4932,14 @@ done: | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq) | ||||
| { | ||||
| 	path->slots[0]++; | ||||
| 	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) | ||||
| 		return btrfs_next_old_leaf(root, path, time_seq); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps | ||||
|  * searching until it gets past min_objectid or finds an item of 'type' | ||||
|  | @ -4933,3 +5023,18 @@ int btrfs_previous_extent_item(struct btrfs_root *root, | |||
| 	} | ||||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| int __init btrfs_ctree_init(void) | ||||
| { | ||||
| 	btrfs_path_cachep = kmem_cache_create("btrfs_path", | ||||
| 			sizeof(struct btrfs_path), 0, | ||||
| 			SLAB_MEM_SPREAD, NULL); | ||||
| 	if (!btrfs_path_cachep) | ||||
| 		return -ENOMEM; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_ctree_exit(void) | ||||
| { | ||||
| 	kmem_cache_destroy(btrfs_path_cachep); | ||||
| } | ||||
|  |  | |||
							
								
								
									
										3465
									
								
								fs/btrfs/ctree.h
									
										
									
									
									
								
							
							
						
						
									
										3465
									
								
								fs/btrfs/ctree.h
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										1376
									
								
								fs/btrfs/defrag.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1376
									
								
								fs/btrfs/defrag.c
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										22
									
								
								fs/btrfs/defrag.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								fs/btrfs/defrag.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_DEFRAG_H | ||||
| #define BTRFS_DEFRAG_H | ||||
| 
 | ||||
| int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, | ||||
| 		      struct btrfs_ioctl_defrag_range_args *range, | ||||
| 		      u64 newer_than, unsigned long max_to_defrag); | ||||
| int __init btrfs_auto_defrag_init(void); | ||||
| void __cold btrfs_auto_defrag_exit(void); | ||||
| int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_inode *inode, u32 extent_thresh); | ||||
| int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root); | ||||
| 
 | ||||
| static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	return signal_pending(current); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
|  | @ -1,5 +1,6 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| 
 | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "delalloc-space.h" | ||||
| #include "block-rsv.h" | ||||
|  | @ -8,6 +9,7 @@ | |||
| #include "transaction.h" | ||||
| #include "qgroup.h" | ||||
| #include "block-group.h" | ||||
| #include "fs.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * HOW DOES THIS WORK | ||||
|  | @ -200,8 +202,8 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode, | |||
| 	btrfs_qgroup_free_data(inode, reserved, start, len); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Release any excessive reservation | ||||
| /*
 | ||||
|  * Release any excessive reservations for an inode. | ||||
|  * | ||||
|  * @inode:       the inode we need to release from | ||||
|  * @qgroup_free: free or convert qgroup meta. Unlike normal operation, qgroup | ||||
|  | @ -375,12 +377,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Release a metadata reservation for an inode | ||||
| /*
 | ||||
|  * Release a metadata reservation for an inode. | ||||
|  * | ||||
|  * @inode: the inode to release the reservation for. | ||||
|  * @num_bytes: the number of bytes we are releasing. | ||||
|  * @qgroup_free: free qgroup reservation or convert it to per-trans reservation | ||||
|  * @inode:        the inode to release the reservation for. | ||||
|  * @num_bytes:    the number of bytes we are releasing. | ||||
|  * @qgroup_free:  free qgroup reservation or convert it to per-trans reservation | ||||
|  * | ||||
|  * This will release the metadata reservation for an inode.  This can be called | ||||
|  * once we complete IO for a given set of bytes to release their metadata | ||||
|  | @ -403,10 +405,11 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, | |||
| 	btrfs_inode_rsv_release(inode, qgroup_free); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_delalloc_release_extents - release our outstanding_extents | ||||
|  * @inode: the inode to balance the reservation for. | ||||
|  * @num_bytes: the number of bytes we originally reserved with | ||||
| /*
 | ||||
|  * Release our outstanding_extents for an inode. | ||||
|  * | ||||
|  * @inode:      the inode to balance the reservation for. | ||||
|  * @num_bytes:  the number of bytes we originally reserved with | ||||
|  * | ||||
|  * When we reserve space we increase outstanding_extents for the extents we may | ||||
|  * add.  Once we've set the range as delalloc or created our ordered extents we | ||||
|  | @ -431,30 +434,30 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) | |||
| 	btrfs_inode_rsv_release(inode, true); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_delalloc_reserve_space - reserve data and metadata space for | ||||
|  * delalloc | ||||
|  * @inode: inode we're writing to | ||||
|  * @start: start range we are writing to | ||||
|  * @len: how long the range we are writing to | ||||
|  * @reserved: mandatory parameter, record actually reserved qgroup ranges of | ||||
|  * 	      current reservation. | ||||
| /*
 | ||||
|  * Reserve data and metadata space for delalloc | ||||
|  * | ||||
|  * @inode:     inode we're writing to | ||||
|  * @start:     start range we are writing to | ||||
|  * @len:       how long the range we are writing to | ||||
|  * @reserved:  mandatory parameter, record actually reserved qgroup ranges of | ||||
|  * 	       current reservation. | ||||
|  * | ||||
|  * This will do the following things | ||||
|  * | ||||
|  * - reserve space in data space info for num bytes | ||||
|  *   and reserve precious corresponding qgroup space | ||||
|  * - reserve space in data space info for num bytes and reserve precious | ||||
|  *   corresponding qgroup space | ||||
|  *   (Done in check_data_free_space) | ||||
|  * | ||||
|  * - reserve space for metadata space, based on the number of outstanding | ||||
|  *   extents and how much csums will be needed | ||||
|  *   also reserve metadata space in a per root over-reserve method. | ||||
|  *   extents and how much csums will be needed also reserve metadata space in a | ||||
|  *   per root over-reserve method. | ||||
|  * - add to the inodes->delalloc_bytes | ||||
|  * - add it to the fs_info's delalloc inodes list. | ||||
|  *   (Above 3 all done in delalloc_reserve_metadata) | ||||
|  * | ||||
|  * Return 0 for success | ||||
|  * Return <0 for error(-ENOSPC or -EQUOT) | ||||
|  * Return <0 for error(-ENOSPC or -EDQUOT) | ||||
|  */ | ||||
| int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, | ||||
| 			struct extent_changeset **reserved, u64 start, u64 len) | ||||
|  | @ -473,7 +476,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Release data and metadata space for delalloc | ||||
|  * | ||||
|  * @inode:       inode we're releasing space for | ||||
|  | @ -482,10 +485,10 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, | |||
|  * @len:         length of the space already reserved | ||||
|  * @qgroup_free: should qgroup reserved-space also be freed | ||||
|  * | ||||
|  * This function will release the metadata space that was not used and will | ||||
|  * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes | ||||
|  * list if there are no delalloc bytes left. | ||||
|  * Also it will handle the qgroup reserved space. | ||||
|  * Release the metadata space that was not used and will decrement | ||||
|  * ->delalloc_bytes and remove it from the fs_info->delalloc_inodes list if | ||||
|  * there are no delalloc bytes left.  Also it will handle the qgroup reserved | ||||
|  * space. | ||||
|  */ | ||||
| void btrfs_delalloc_release_space(struct btrfs_inode *inode, | ||||
| 				  struct extent_changeset *reserved, | ||||
|  |  | |||
|  | @ -20,5 +20,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, | |||
| 				     bool qgroup_free); | ||||
| int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, | ||||
| 			struct extent_changeset **reserved, u64 start, u64 len); | ||||
| int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, | ||||
| 				    u64 disk_num_bytes, bool noflush); | ||||
| void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); | ||||
| 
 | ||||
| #endif /* BTRFS_DELALLOC_SPACE_H */ | ||||
|  |  | |||
|  | @ -6,14 +6,19 @@ | |||
| 
 | ||||
| #include <linux/slab.h> | ||||
| #include <linux/iversion.h> | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "messages.h" | ||||
| #include "misc.h" | ||||
| #include "delayed-inode.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
| #include "ctree.h" | ||||
| #include "qgroup.h" | ||||
| #include "locking.h" | ||||
| #include "inode-item.h" | ||||
| #include "space-info.h" | ||||
| #include "accessors.h" | ||||
| #include "file-item.h" | ||||
| 
 | ||||
| #define BTRFS_DELAYED_WRITEBACK		512 | ||||
| #define BTRFS_DELAYED_BACKGROUND	128 | ||||
|  | @ -1412,7 +1417,7 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) | |||
| int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | ||||
| 				   const char *name, int name_len, | ||||
| 				   struct btrfs_inode *dir, | ||||
| 				   struct btrfs_disk_key *disk_key, u8 type, | ||||
| 				   struct btrfs_disk_key *disk_key, u8 flags, | ||||
| 				   u64 index) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = trans->fs_info; | ||||
|  | @ -1443,7 +1448,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
| 	btrfs_set_stack_dir_transid(dir_item, trans->transid); | ||||
| 	btrfs_set_stack_dir_data_len(dir_item, 0); | ||||
| 	btrfs_set_stack_dir_name_len(dir_item, name_len); | ||||
| 	btrfs_set_stack_dir_type(dir_item, type); | ||||
| 	btrfs_set_stack_dir_flags(dir_item, flags); | ||||
| 	memcpy((char *)(dir_item + 1), name, name_len); | ||||
| 
 | ||||
| 	data_len = delayed_item->data_len + sizeof(struct btrfs_item); | ||||
|  | @ -1641,8 +1646,8 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, | |||
| 	 * We can only do one readdir with delayed items at a time because of | ||||
| 	 * item->readdir_list. | ||||
| 	 */ | ||||
| 	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); | ||||
| 	btrfs_inode_lock(inode, 0); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); | ||||
| 	btrfs_inode_lock(BTRFS_I(inode), 0); | ||||
| 
 | ||||
| 	mutex_lock(&delayed_node->mutex); | ||||
| 	item = __btrfs_first_delayed_insertion_item(delayed_node); | ||||
|  | @ -1753,7 +1758,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, | |||
| 		name = (char *)(di + 1); | ||||
| 		name_len = btrfs_stack_dir_name_len(di); | ||||
| 
 | ||||
| 		d_type = fs_ftype_to_dtype(di->type); | ||||
| 		d_type = fs_ftype_to_dtype(btrfs_dir_flags_to_ftype(di->type)); | ||||
| 		btrfs_disk_key_to_cpu(&location, &di->location); | ||||
| 
 | ||||
| 		over = !dir_emit(ctx, name, name_len, | ||||
|  |  | |||
|  | @ -113,7 +113,7 @@ static inline void btrfs_init_delayed_root( | |||
| int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | ||||
| 				   const char *name, int name_len, | ||||
| 				   struct btrfs_inode *dir, | ||||
| 				   struct btrfs_disk_key *disk_key, u8 type, | ||||
| 				   struct btrfs_disk_key *disk_key, u8 flags, | ||||
| 				   u64 index); | ||||
| 
 | ||||
| int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, | ||||
|  |  | |||
|  | @ -6,12 +6,14 @@ | |||
| #include <linux/sched.h> | ||||
| #include <linux/slab.h> | ||||
| #include <linux/sort.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "delayed-ref.h" | ||||
| #include "transaction.h" | ||||
| #include "qgroup.h" | ||||
| #include "space-info.h" | ||||
| #include "tree-mod-log.h" | ||||
| #include "fs.h" | ||||
| 
 | ||||
| struct kmem_cache *btrfs_delayed_ref_head_cachep; | ||||
| struct kmem_cache *btrfs_delayed_tree_ref_cachep; | ||||
|  | @ -69,14 +71,14 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) | |||
| 	return btrfs_check_space_for_delayed_refs(trans->fs_info); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Release a ref head's reservation | ||||
| /*
 | ||||
|  * Release a ref head's reservation. | ||||
|  * | ||||
|  * @fs_info:  the filesystem | ||||
|  * @nr:       number of items to drop | ||||
|  * | ||||
|  * This drops the delayed ref head's count from the delayed refs rsv and frees | ||||
|  * any excess reservation we had. | ||||
|  * Drops the delayed ref head's count from the delayed refs rsv and free any | ||||
|  * excess reservation we had. | ||||
|  */ | ||||
| void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) | ||||
| { | ||||
|  | @ -102,8 +104,7 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv | ||||
|  * @trans - the trans that may have generated delayed refs | ||||
|  * Adjust the size of the delayed refs rsv. | ||||
|  * | ||||
|  * This is to be called anytime we may have adjusted trans->delayed_ref_updates, | ||||
|  * it'll calculate the additional size and add it to the delayed_refs_rsv. | ||||
|  | @ -137,8 +138,8 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) | |||
| 	trans->delayed_ref_updates = 0; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Transfer bytes to our delayed refs rsv | ||||
| /*
 | ||||
|  * Transfer bytes to our delayed refs rsv. | ||||
|  * | ||||
|  * @fs_info:   the filesystem | ||||
|  * @src:       source block rsv to transfer from | ||||
|  | @ -186,8 +187,8 @@ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, | |||
| 				delayed_refs_rsv->space_info, to_free); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Refill based on our delayed refs usage | ||||
| /*
 | ||||
|  * Refill based on our delayed refs usage. | ||||
|  * | ||||
|  * @fs_info: the filesystem | ||||
|  * @flush:   control how we can flush for this reservation. | ||||
|  |  | |||
|  | @ -18,11 +18,13 @@ | |||
| #include "volumes.h" | ||||
| #include "async-thread.h" | ||||
| #include "check-integrity.h" | ||||
| #include "rcu-string.h" | ||||
| #include "dev-replace.h" | ||||
| #include "sysfs.h" | ||||
| #include "zoned.h" | ||||
| #include "block-group.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "scrub.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Device replace overview | ||||
|  | @ -246,7 +248,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
| 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||||
| 	struct btrfs_device *device; | ||||
| 	struct block_device *bdev; | ||||
| 	struct rcu_string *name; | ||||
| 	u64 devid = BTRFS_DEV_REPLACE_DEVID; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
|  | @ -290,19 +291,12 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 	device = btrfs_alloc_device(NULL, &devid, NULL); | ||||
| 	device = btrfs_alloc_device(NULL, &devid, NULL, device_path); | ||||
| 	if (IS_ERR(device)) { | ||||
| 		ret = PTR_ERR(device); | ||||
| 		goto error; | ||||
| 	} | ||||
| 
 | ||||
| 	name = rcu_string_strdup(device_path, GFP_KERNEL); | ||||
| 	if (!name) { | ||||
| 		btrfs_free_device(device); | ||||
| 		ret = -ENOMEM; | ||||
| 		goto error; | ||||
| 	} | ||||
| 	rcu_assign_pointer(device->name, name); | ||||
| 	ret = lookup_bdev(device_path, &device->devt); | ||||
| 	if (ret) | ||||
| 		goto error; | ||||
|  | @ -456,14 +450,6 @@ out: | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static char* btrfs_dev_name(struct btrfs_device *device) | ||||
| { | ||||
| 	if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) | ||||
| 		return "<missing disk>"; | ||||
| 	else | ||||
| 		return rcu_str_deref(device->name); | ||||
| } | ||||
| 
 | ||||
| static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info, | ||||
| 				    struct btrfs_device *src_dev) | ||||
| { | ||||
|  | @ -679,7 +665,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, | |||
| 		      "dev_replace from %s (devid %llu) to %s started", | ||||
| 		      btrfs_dev_name(src_device), | ||||
| 		      src_device->devid, | ||||
| 		      rcu_str_deref(tgt_device->name)); | ||||
| 		      btrfs_dev_name(tgt_device)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * from now on, the writes to the srcdev are all duplicated to | ||||
|  | @ -938,7 +924,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 				 "btrfs_scrub_dev(%s, %llu, %s) failed %d", | ||||
| 				 btrfs_dev_name(src_device), | ||||
| 				 src_device->devid, | ||||
| 				 rcu_str_deref(tgt_device->name), scrub_ret); | ||||
| 				 btrfs_dev_name(tgt_device), scrub_ret); | ||||
| error: | ||||
| 		up_write(&dev_replace->rwsem); | ||||
| 		mutex_unlock(&fs_info->chunk_mutex); | ||||
|  | @ -956,7 +942,7 @@ error: | |||
| 			  "dev_replace from %s (devid %llu) to %s finished", | ||||
| 			  btrfs_dev_name(src_device), | ||||
| 			  src_device->devid, | ||||
| 			  rcu_str_deref(tgt_device->name)); | ||||
| 			  btrfs_dev_name(tgt_device)); | ||||
| 	clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &tgt_device->dev_state); | ||||
| 	tgt_device->devid = src_device->devid; | ||||
| 	src_device->devid = BTRFS_DEV_REPLACE_DEVID; | ||||
|  |  | |||
|  | @ -25,5 +25,13 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); | |||
| bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev, | ||||
| 				      struct btrfs_block_group *cache, | ||||
| 				      u64 physical); | ||||
| void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount); | ||||
| 
 | ||||
| static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	btrfs_bio_counter_sub(fs_info, 1); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -3,9 +3,12 @@ | |||
|  * Copyright (C) 2007 Oracle.  All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
| #include "accessors.h" | ||||
| #include "dir-item.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * insert a name into a directory, doing overflow properly if there is a hash | ||||
|  | @ -81,7 +84,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
| 	leaf = path->nodes[0]; | ||||
| 	btrfs_cpu_key_to_disk(&disk_key, &location); | ||||
| 	btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | ||||
| 	btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); | ||||
| 	btrfs_set_dir_flags(leaf, dir_item, BTRFS_FT_XATTR); | ||||
| 	btrfs_set_dir_name_len(leaf, dir_item, name_len); | ||||
| 	btrfs_set_dir_transid(leaf, dir_item, trans->transid); | ||||
| 	btrfs_set_dir_data_len(leaf, dir_item, data_len); | ||||
|  | @ -103,8 +106,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
|  * to use for the second index (if one is created). | ||||
|  * Will return 0 or -ENOMEM | ||||
|  */ | ||||
| int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, | ||||
| 			  int name_len, struct btrfs_inode *dir, | ||||
| int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | ||||
| 			  const struct fscrypt_str *name, struct btrfs_inode *dir, | ||||
| 			  struct btrfs_key *location, u8 type, u64 index) | ||||
| { | ||||
| 	int ret = 0; | ||||
|  | @ -120,7 +123,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, | |||
| 
 | ||||
| 	key.objectid = btrfs_ino(dir); | ||||
| 	key.type = BTRFS_DIR_ITEM_KEY; | ||||
| 	key.offset = btrfs_name_hash(name, name_len); | ||||
| 	key.offset = btrfs_name_hash(name->name, name->len); | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
| 	if (!path) | ||||
|  | @ -128,9 +131,9 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, | |||
| 
 | ||||
| 	btrfs_cpu_key_to_disk(&disk_key, location); | ||||
| 
 | ||||
| 	data_size = sizeof(*dir_item) + name_len; | ||||
| 	data_size = sizeof(*dir_item) + name->len; | ||||
| 	dir_item = insert_with_overflow(trans, root, path, &key, data_size, | ||||
| 					name, name_len); | ||||
| 					name->name, name->len); | ||||
| 	if (IS_ERR(dir_item)) { | ||||
| 		ret = PTR_ERR(dir_item); | ||||
| 		if (ret == -EEXIST) | ||||
|  | @ -138,15 +141,18 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name, | |||
| 		goto out_free; | ||||
| 	} | ||||
| 
 | ||||
| 	if (IS_ENCRYPTED(&dir->vfs_inode)) | ||||
| 		type |= BTRFS_FT_ENCRYPTED; | ||||
| 
 | ||||
| 	leaf = path->nodes[0]; | ||||
| 	btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | ||||
| 	btrfs_set_dir_type(leaf, dir_item, type); | ||||
| 	btrfs_set_dir_flags(leaf, dir_item, type); | ||||
| 	btrfs_set_dir_data_len(leaf, dir_item, 0); | ||||
| 	btrfs_set_dir_name_len(leaf, dir_item, name_len); | ||||
| 	btrfs_set_dir_name_len(leaf, dir_item, name->len); | ||||
| 	btrfs_set_dir_transid(leaf, dir_item, trans->transid); | ||||
| 	name_ptr = (unsigned long)(dir_item + 1); | ||||
| 
 | ||||
| 	write_extent_buffer(leaf, name, name_ptr, name_len); | ||||
| 	write_extent_buffer(leaf, name->name, name_ptr, name->len); | ||||
| 	btrfs_mark_buffer_dirty(leaf); | ||||
| 
 | ||||
| second_insert: | ||||
|  | @ -157,7 +163,7 @@ second_insert: | |||
| 	} | ||||
| 	btrfs_release_path(path); | ||||
| 
 | ||||
| 	ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir, | ||||
| 	ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir, | ||||
| 					      &disk_key, type, index); | ||||
| out_free: | ||||
| 	btrfs_free_path(path); | ||||
|  | @ -206,7 +212,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir( | |||
| struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | ||||
| 					     struct btrfs_root *root, | ||||
| 					     struct btrfs_path *path, u64 dir, | ||||
| 					     const char *name, int name_len, | ||||
| 					     const struct fscrypt_str *name, | ||||
| 					     int mod) | ||||
| { | ||||
| 	struct btrfs_key key; | ||||
|  | @ -214,9 +220,10 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 	key.objectid = dir; | ||||
| 	key.type = BTRFS_DIR_ITEM_KEY; | ||||
| 	key.offset = btrfs_name_hash(name, name_len); | ||||
| 	key.offset = btrfs_name_hash(name->name, name->len); | ||||
| 
 | ||||
| 	di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); | ||||
| 	di = btrfs_lookup_match_dir(trans, root, path, &key, name->name, | ||||
| 				    name->len, mod); | ||||
| 	if (IS_ERR(di) && PTR_ERR(di) == -ENOENT) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -224,7 +231,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
| } | ||||
| 
 | ||||
| int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | ||||
| 				   const char *name, int name_len) | ||||
| 				   const struct fscrypt_str *name) | ||||
| { | ||||
| 	int ret; | ||||
| 	struct btrfs_key key; | ||||
|  | @ -240,9 +247,10 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | |||
| 
 | ||||
| 	key.objectid = dir; | ||||
| 	key.type = BTRFS_DIR_ITEM_KEY; | ||||
| 	key.offset = btrfs_name_hash(name, name_len); | ||||
| 	key.offset = btrfs_name_hash(name->name, name->len); | ||||
| 
 | ||||
| 	di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0); | ||||
| 	di = btrfs_lookup_match_dir(NULL, root, path, &key, name->name, | ||||
| 				    name->len, 0); | ||||
| 	if (IS_ERR(di)) { | ||||
| 		ret = PTR_ERR(di); | ||||
| 		/* Nothing found, we're safe */ | ||||
|  | @ -262,11 +270,8 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | |||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * see if there is room in the item to insert this | ||||
| 	 * name | ||||
| 	 */ | ||||
| 	data_size = sizeof(*di) + name_len; | ||||
| 	/* See if there is room in the item to insert this name. */ | ||||
| 	data_size = sizeof(*di) + name->len; | ||||
| 	leaf = path->nodes[0]; | ||||
| 	slot = path->slots[0]; | ||||
| 	if (data_size + btrfs_item_size(leaf, slot) + | ||||
|  | @ -303,8 +308,7 @@ struct btrfs_dir_item * | |||
| btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | ||||
| 			    struct btrfs_root *root, | ||||
| 			    struct btrfs_path *path, u64 dir, | ||||
| 			    u64 index, const char *name, int name_len, | ||||
| 			    int mod) | ||||
| 			    u64 index, const struct fscrypt_str *name, int mod) | ||||
| { | ||||
| 	struct btrfs_dir_item *di; | ||||
| 	struct btrfs_key key; | ||||
|  | @ -313,7 +317,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
| 	key.type = BTRFS_DIR_INDEX_KEY; | ||||
| 	key.offset = index; | ||||
| 
 | ||||
| 	di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod); | ||||
| 	di = btrfs_lookup_match_dir(trans, root, path, &key, name->name, | ||||
| 				    name->len, mod); | ||||
| 	if (di == ERR_PTR(-ENOENT)) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -321,9 +326,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
| } | ||||
| 
 | ||||
| struct btrfs_dir_item * | ||||
| btrfs_search_dir_index_item(struct btrfs_root *root, | ||||
| 			    struct btrfs_path *path, u64 dirid, | ||||
| 			    const char *name, int name_len) | ||||
| btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, | ||||
| 			    u64 dirid, const struct fscrypt_str *name) | ||||
| { | ||||
| 	struct btrfs_dir_item *di; | ||||
| 	struct btrfs_key key; | ||||
|  | @ -338,7 +342,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root, | |||
| 			break; | ||||
| 
 | ||||
| 		di = btrfs_match_dir_item_name(root->fs_info, path, | ||||
| 					       name, name_len); | ||||
| 					       name->name, name->len); | ||||
| 		if (di) | ||||
| 			return di; | ||||
| 	} | ||||
|  |  | |||
							
								
								
									
										42
									
								
								fs/btrfs/dir-item.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								fs/btrfs/dir-item.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,42 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_DIR_ITEM_H | ||||
| #define BTRFS_DIR_ITEM_H | ||||
| 
 | ||||
| int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | ||||
| 			  const struct fscrypt_str *name); | ||||
| int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | ||||
| 			  const struct fscrypt_str *name, struct btrfs_inode *dir, | ||||
| 			  struct btrfs_key *location, u8 type, u64 index); | ||||
| struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | ||||
| 					     struct btrfs_root *root, | ||||
| 					     struct btrfs_path *path, u64 dir, | ||||
| 					     const struct fscrypt_str *name, int mod); | ||||
| struct btrfs_dir_item *btrfs_lookup_dir_index_item( | ||||
| 			struct btrfs_trans_handle *trans, | ||||
| 			struct btrfs_root *root, | ||||
| 			struct btrfs_path *path, u64 dir, | ||||
| 			u64 index, const struct fscrypt_str *name, int mod); | ||||
| struct btrfs_dir_item *btrfs_search_dir_index_item(struct btrfs_root *root, | ||||
| 			    struct btrfs_path *path, u64 dirid, | ||||
| 			    const struct fscrypt_str *name); | ||||
| int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | ||||
| 			      struct btrfs_root *root, | ||||
| 			      struct btrfs_path *path, | ||||
| 			      struct btrfs_dir_item *di); | ||||
| int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | ||||
| 			    struct btrfs_root *root, | ||||
| 			    struct btrfs_path *path, u64 objectid, | ||||
| 			    const char *name, u16 name_len, | ||||
| 			    const void *data, u16 data_len); | ||||
| struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | ||||
| 					  struct btrfs_root *root, | ||||
| 					  struct btrfs_path *path, u64 dir, | ||||
| 					  const char *name, u16 name_len, | ||||
| 					  int mod); | ||||
| struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, | ||||
| 						 struct btrfs_path *path, | ||||
| 						 const char *name, | ||||
| 						 int name_len); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -11,6 +11,7 @@ | |||
| #include "block-group.h" | ||||
| #include "discard.h" | ||||
| #include "free-space-cache.h" | ||||
| #include "fs.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * This contains the logic to handle async discard. | ||||
|  | @ -61,7 +62,7 @@ | |||
| #define BTRFS_DISCARD_MAX_DELAY_MSEC	(1000UL) | ||||
| #define BTRFS_DISCARD_MAX_IOPS		(10U) | ||||
| 
 | ||||
| /* Montonically decreasing minimum length filters after index 0 */ | ||||
| /* Monotonically decreasing minimum length filters after index 0 */ | ||||
| static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = { | ||||
| 	0, | ||||
| 	BTRFS_ASYNC_DISCARD_MAX_FILTER, | ||||
|  | @ -146,10 +147,11 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, | |||
| 	return running; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * find_next_block_group - find block_group that's up next for discarding | ||||
|  * @discard_ctl: discard control | ||||
|  * @now: current time | ||||
| /*
 | ||||
|  * Find block_group that's up next for discarding. | ||||
|  * | ||||
|  * @discard_ctl:  discard control | ||||
|  * @now:          current time | ||||
|  * | ||||
|  * Iterate over the discard lists to find the next block_group up for | ||||
|  * discarding checking the discard_eligible_time of block_group. | ||||
|  | @ -184,17 +186,17 @@ static struct btrfs_block_group *find_next_block_group( | |||
| 	return ret_block_group; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Wrap find_next_block_group() | ||||
| /*
 | ||||
|  * Look up next block group and set it for use. | ||||
|  * | ||||
|  * @discard_ctl:   discard control | ||||
|  * @discard_state: the discard_state of the block_group after state management | ||||
|  * @discard_index: the discard_index of the block_group after state management | ||||
|  * @now:           time when discard was invoked, in ns | ||||
|  * | ||||
|  * This wraps find_next_block_group() and sets the block_group to be in use. | ||||
|  * discard_state's control flow is managed here.  Variables related to | ||||
|  * discard_state are reset here as needed (eg discard_cursor).  @discard_state | ||||
|  * Wrap find_next_block_group() and set the block_group to be in use. | ||||
|  * @discard_state's control flow is managed here.  Variables related to | ||||
|  * @discard_state are reset here as needed (eg. @discard_cursor).  @discard_state | ||||
|  * and @discard_index are remembered as it may change while we're discarding, | ||||
|  * but we want the discard to execute in the context determined here. | ||||
|  */ | ||||
|  | @ -233,10 +235,11 @@ again: | |||
| 	return block_group; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_check_filter - updates a block groups filters | ||||
|  * @block_group: block group of interest | ||||
|  * @bytes: recently freed region size after coalescing | ||||
| /*
 | ||||
|  * Update a block group's filters. | ||||
|  * | ||||
|  * @block_group:  block group of interest | ||||
|  * @bytes:        recently freed region size after coalescing | ||||
|  * | ||||
|  * Async discard maintains multiple lists with progressively smaller filters | ||||
|  * to prioritize discarding based on size.  Should a free space that matches | ||||
|  | @ -271,8 +274,9 @@ void btrfs_discard_check_filter(struct btrfs_block_group *block_group, | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_update_discard_index - moves a block group along the discard lists | ||||
| /*
 | ||||
|  * Move a block group along the discard lists. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * @block_group: block_group of interest | ||||
|  * | ||||
|  | @ -291,13 +295,14 @@ static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, | |||
| 	add_to_discard_list(discard_ctl, block_group); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_cancel_work - remove a block_group from the discard lists | ||||
| /*
 | ||||
|  * Remove a block_group from the discard lists. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * @block_group: block_group of interest | ||||
|  * | ||||
|  * This removes @block_group from the discard lists.  If necessary, it waits on | ||||
|  * the current work and then reschedules the delayed work. | ||||
|  * Remove @block_group from the discard lists.  If necessary, wait on the | ||||
|  * current work and then reschedule the delayed work. | ||||
|  */ | ||||
| void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, | ||||
| 			       struct btrfs_block_group *block_group) | ||||
|  | @ -308,12 +313,13 @@ void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_queue_work - handles queuing the block_groups | ||||
| /*
 | ||||
|  * Handles queuing the block_groups. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * @block_group: block_group of interest | ||||
|  * | ||||
|  * This maintains the LRU order of the discard lists. | ||||
|  * Maintain the LRU order of the discard lists. | ||||
|  */ | ||||
| void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, | ||||
| 			      struct btrfs_block_group *block_group) | ||||
|  | @ -383,7 +389,8 @@ static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * btrfs_discard_schedule_work - responsible for scheduling the discard work | ||||
|  * Responsible for scheduling the discard work. | ||||
|  * | ||||
|  * @discard_ctl:  discard control | ||||
|  * @override:     override the current timer | ||||
|  * | ||||
|  | @ -401,15 +408,16 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, | |||
| 	spin_unlock(&discard_ctl->lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_finish_discard_pass - determine next step of a block_group | ||||
| /*
 | ||||
|  * Determine next step of a block_group. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * @block_group: block_group of interest | ||||
|  * | ||||
|  * This determines the next step for a block group after it's finished going | ||||
|  * through a pass on a discard list.  If it is unused and fully trimmed, we can | ||||
|  * mark it unused and send it to the unused_bgs path.  Otherwise, pass it onto | ||||
|  * the appropriate filter list or let it fall off. | ||||
|  * Determine the next step for a block group after it's finished going through | ||||
|  * a pass on a discard list.  If it is unused and fully trimmed, we can mark it | ||||
|  * unused and send it to the unused_bgs path.  Otherwise, pass it onto the | ||||
|  * appropriate filter list or let it fall off. | ||||
|  */ | ||||
| static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, | ||||
| 				      struct btrfs_block_group *block_group) | ||||
|  | @ -426,12 +434,13 @@ static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_workfn - discard work function | ||||
| /*
 | ||||
|  * Discard work queue callback | ||||
|  * | ||||
|  * @work: work | ||||
|  * | ||||
|  * This finds the next block_group to start discarding and then discards a | ||||
|  * single region.  It does this in a two-pass fashion: first extents and second | ||||
|  * Find the next block_group to start discarding and then discard a single | ||||
|  * region.  It does this in a two-pass fashion: first extents and second | ||||
|  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path. | ||||
|  */ | ||||
| static void btrfs_discard_workfn(struct work_struct *work) | ||||
|  | @ -507,11 +516,12 @@ static void btrfs_discard_workfn(struct work_struct *work) | |||
| 	spin_unlock(&discard_ctl->lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_run_discard_work - determines if async discard should be running | ||||
| /*
 | ||||
|  * Determine if async discard should be running. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * | ||||
|  * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. | ||||
|  * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. | ||||
|  */ | ||||
| bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) | ||||
| { | ||||
|  | @ -523,8 +533,9 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) | |||
| 		test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_calc_delay - recalculate the base delay | ||||
| /*
 | ||||
|  * Recalculate the base delay. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * | ||||
|  * Recalculate the base delay which is based off the total number of | ||||
|  | @ -545,7 +556,7 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) | |||
| 	spin_lock(&discard_ctl->lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The following is to fix a potential -1 discrepenancy that we're not | ||||
| 	 * The following is to fix a potential -1 discrepancy that we're not | ||||
| 	 * sure how to reproduce. But given that this is the only place that | ||||
| 	 * utilizes these numbers and this is only called by from | ||||
| 	 * btrfs_finish_extent_commit() which is synchronized, we can correct | ||||
|  | @ -578,13 +589,14 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) | |||
| 	spin_unlock(&discard_ctl->lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_update_discardable - propagate discard counters | ||||
| /*
 | ||||
|  * Propagate discard counters. | ||||
|  * | ||||
|  * @block_group: block_group of interest | ||||
|  * | ||||
|  * This propagates deltas of counters up to the discard_ctl.  It maintains a | ||||
|  * current counter and a previous counter passing the delta up to the global | ||||
|  * stat.  Then the current counter value becomes the previous counter value. | ||||
|  * Propagate deltas of counters up to the discard_ctl.  It maintains a current | ||||
|  * counter and a previous counter passing the delta up to the global stat. | ||||
|  * Then the current counter value becomes the previous counter value. | ||||
|  */ | ||||
| void btrfs_discard_update_discardable(struct btrfs_block_group *block_group) | ||||
| { | ||||
|  | @ -619,8 +631,9 @@ void btrfs_discard_update_discardable(struct btrfs_block_group *block_group) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists | ||||
| /*
 | ||||
|  * Punt unused_bgs list to discard lists. | ||||
|  * | ||||
|  * @fs_info: fs_info of interest | ||||
|  * | ||||
|  * The unused_bgs list needs to be punted to the discard lists because the | ||||
|  | @ -644,8 +657,9 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) | |||
| 	spin_unlock(&fs_info->unused_bgs_lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_discard_purge_list - purge discard lists | ||||
| /*
 | ||||
|  * Purge discard lists. | ||||
|  * | ||||
|  * @discard_ctl: discard control | ||||
|  * | ||||
|  * If we are disabling async discard, we may have intercepted block groups that | ||||
|  |  | |||
|  | @ -23,7 +23,7 @@ | |||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
| #include "btrfs_inode.h" | ||||
| #include "volumes.h" | ||||
| #include "bio.h" | ||||
| #include "print-tree.h" | ||||
| #include "locking.h" | ||||
| #include "tree-log.h" | ||||
|  | @ -43,6 +43,15 @@ | |||
| #include "space-info.h" | ||||
| #include "zoned.h" | ||||
| #include "subpage.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "root-tree.h" | ||||
| #include "defrag.h" | ||||
| #include "uuid-tree.h" | ||||
| #include "relocation.h" | ||||
| #include "scrub.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define BTRFS_SUPER_FLAG_SUPP	(BTRFS_HEADER_FLAG_WRITTEN |\ | ||||
| 				 BTRFS_HEADER_FLAG_RELOC |\ | ||||
|  | @ -75,12 +84,12 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) | |||
|  * just before they are sent down the IO stack. | ||||
|  */ | ||||
| struct async_submit_bio { | ||||
| 	struct inode *inode; | ||||
| 	struct btrfs_inode *inode; | ||||
| 	struct bio *bio; | ||||
| 	extent_submit_bio_start_t *submit_bio_start; | ||||
| 	enum btrfs_wq_submit_cmd submit_cmd; | ||||
| 	int mirror_num; | ||||
| 
 | ||||
| 	/* Optional parameter for submit_bio_start used by direct io */ | ||||
| 	/* Optional parameter for used by direct io */ | ||||
| 	u64 dio_file_offset; | ||||
| 	struct btrfs_work work; | ||||
| 	blk_status_t status; | ||||
|  | @ -246,40 +255,54 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, | ||||
| 				      int mirror_num) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = eb->fs_info; | ||||
| 	u64 start = eb->start; | ||||
| 	int i, num_pages = num_extent_pages(eb); | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	if (sb_rdonly(fs_info->sb)) | ||||
| 		return -EROFS; | ||||
| 
 | ||||
| 	for (i = 0; i < num_pages; i++) { | ||||
| 		struct page *p = eb->pages[i]; | ||||
| 
 | ||||
| 		ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, | ||||
| 				start, p, start - page_offset(p), mirror_num); | ||||
| 		if (ret) | ||||
| 			break; | ||||
| 		start += PAGE_SIZE; | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * helper to read a given tree block, doing retries as required when | ||||
|  * the checksums don't match and we have alternate mirrors to try. | ||||
|  * | ||||
|  * @parent_transid:	expected transid, skip check if 0 | ||||
|  * @level:		expected level, mandatory check | ||||
|  * @first_key:		expected key of first slot, skip check if NULL | ||||
|  * @check:		expected tree parentness check, see the comments of the | ||||
|  *			structure for details. | ||||
|  */ | ||||
| int btrfs_read_extent_buffer(struct extent_buffer *eb, | ||||
| 			     u64 parent_transid, int level, | ||||
| 			     struct btrfs_key *first_key) | ||||
| 			     struct btrfs_tree_parent_check *check) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = eb->fs_info; | ||||
| 	struct extent_io_tree *io_tree; | ||||
| 	int failed = 0; | ||||
| 	int ret; | ||||
| 	int num_copies = 0; | ||||
| 	int mirror_num = 0; | ||||
| 	int failed_mirror = 0; | ||||
| 
 | ||||
| 	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; | ||||
| 	ASSERT(check); | ||||
| 
 | ||||
| 	while (1) { | ||||
| 		clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||||
| 		ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num); | ||||
| 		if (!ret) { | ||||
| 			if (verify_parent_transid(io_tree, eb, | ||||
| 						   parent_transid, 0)) | ||||
| 				ret = -EIO; | ||||
| 			else if (btrfs_verify_level_key(eb, level, | ||||
| 						first_key, parent_transid)) | ||||
| 				ret = -EUCLEAN; | ||||
| 			else | ||||
| 				break; | ||||
| 		} | ||||
| 		ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check); | ||||
| 		if (!ret) | ||||
| 			break; | ||||
| 
 | ||||
| 		num_copies = btrfs_num_copies(fs_info, | ||||
| 					      eb->start, eb->len); | ||||
|  | @ -455,7 +478,8 @@ static int check_tree_block_fsid(struct extent_buffer *eb) | |||
| } | ||||
| 
 | ||||
| /* Do basic extent buffer checks at read time */ | ||||
| static int validate_extent_buffer(struct extent_buffer *eb) | ||||
| static int validate_extent_buffer(struct extent_buffer *eb, | ||||
| 				  struct btrfs_tree_parent_check *check) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = eb->fs_info; | ||||
| 	u64 found_start; | ||||
|  | @ -465,6 +489,8 @@ static int validate_extent_buffer(struct extent_buffer *eb) | |||
| 	const u8 *header_csum; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	ASSERT(check); | ||||
| 
 | ||||
| 	found_start = btrfs_header_bytenr(eb); | ||||
| 	if (found_start != eb->start) { | ||||
| 		btrfs_err_rl(fs_info, | ||||
|  | @ -503,6 +529,45 @@ static int validate_extent_buffer(struct extent_buffer *eb) | |||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (found_level != check->level) { | ||||
| 		ret = -EIO; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (unlikely(check->transid && | ||||
| 		     btrfs_header_generation(eb) != check->transid)) { | ||||
| 		btrfs_err_rl(eb->fs_info, | ||||
| "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", | ||||
| 				eb->start, eb->read_mirror, check->transid, | ||||
| 				btrfs_header_generation(eb)); | ||||
| 		ret = -EIO; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (check->has_first_key) { | ||||
| 		struct btrfs_key *expect_key = &check->first_key; | ||||
| 		struct btrfs_key found_key; | ||||
| 
 | ||||
| 		if (found_level) | ||||
| 			btrfs_node_key_to_cpu(eb, &found_key, 0); | ||||
| 		else | ||||
| 			btrfs_item_key_to_cpu(eb, &found_key, 0); | ||||
| 		if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) { | ||||
| 			btrfs_err(fs_info, | ||||
| "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", | ||||
| 				  eb->start, check->transid, | ||||
| 				  expect_key->objectid, | ||||
| 				  expect_key->type, expect_key->offset, | ||||
| 				  found_key.objectid, found_key.type, | ||||
| 				  found_key.offset); | ||||
| 			ret = -EUCLEAN; | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} | ||||
| 	if (check->owner_root) { | ||||
| 		ret = btrfs_check_eb_owner(eb, check->owner_root); | ||||
| 		if (ret < 0) | ||||
| 			goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If this is a leaf block and it is corrupt, set the corrupt bit so | ||||
| 	 * that we don't try and read the other copies of this block, just | ||||
|  | @ -527,13 +592,15 @@ out: | |||
| } | ||||
| 
 | ||||
| static int validate_subpage_buffer(struct page *page, u64 start, u64 end, | ||||
| 				   int mirror) | ||||
| 				   int mirror, struct btrfs_tree_parent_check *check) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); | ||||
| 	struct extent_buffer *eb; | ||||
| 	bool reads_done; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	ASSERT(check); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We don't allow bio merge for subpage metadata read, so we should | ||||
| 	 * only get one eb for each endio hook. | ||||
|  | @ -557,7 +624,7 @@ static int validate_subpage_buffer(struct page *page, u64 start, u64 end, | |||
| 		ret = -EIO; | ||||
| 		goto err; | ||||
| 	} | ||||
| 	ret = validate_extent_buffer(eb); | ||||
| 	ret = validate_extent_buffer(eb, check); | ||||
| 	if (ret < 0) | ||||
| 		goto err; | ||||
| 
 | ||||
|  | @ -587,7 +654,8 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, | |||
| 	ASSERT(page->private); | ||||
| 
 | ||||
| 	if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE) | ||||
| 		return validate_subpage_buffer(page, start, end, mirror); | ||||
| 		return validate_subpage_buffer(page, start, end, mirror, | ||||
| 					       &bbio->parent_check); | ||||
| 
 | ||||
| 	eb = (struct extent_buffer *)page->private; | ||||
| 
 | ||||
|  | @ -606,7 +674,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, | |||
| 		ret = -EIO; | ||||
| 		goto err; | ||||
| 	} | ||||
| 	ret = validate_extent_buffer(eb); | ||||
| 	ret = validate_extent_buffer(eb, &bbio->parent_check); | ||||
| err: | ||||
| 	if (ret) { | ||||
| 		/*
 | ||||
|  | @ -628,8 +696,18 @@ static void run_one_async_start(struct btrfs_work *work) | |||
| 	blk_status_t ret; | ||||
| 
 | ||||
| 	async = container_of(work, struct  async_submit_bio, work); | ||||
| 	ret = async->submit_bio_start(async->inode, async->bio, | ||||
| 				      async->dio_file_offset); | ||||
| 	switch (async->submit_cmd) { | ||||
| 	case WQ_SUBMIT_METADATA: | ||||
| 		ret = btree_submit_bio_start(async->bio); | ||||
| 		break; | ||||
| 	case WQ_SUBMIT_DATA: | ||||
| 		ret = btrfs_submit_bio_start(async->inode, async->bio); | ||||
| 		break; | ||||
| 	case WQ_SUBMIT_DATA_DIO: | ||||
| 		ret = btrfs_submit_bio_start_direct_io(async->inode, | ||||
| 				async->bio, async->dio_file_offset); | ||||
| 		break; | ||||
| 	} | ||||
| 	if (ret) | ||||
| 		async->status = ret; | ||||
| } | ||||
|  | @ -646,7 +724,7 @@ static void run_one_async_done(struct btrfs_work *work) | |||
| { | ||||
| 	struct async_submit_bio *async = | ||||
| 		container_of(work, struct  async_submit_bio, work); | ||||
| 	struct inode *inode = async->inode; | ||||
| 	struct btrfs_inode *inode = async->inode; | ||||
| 	struct btrfs_bio *bbio = btrfs_bio(async->bio); | ||||
| 
 | ||||
| 	/* If an error occurred we just want to clean up the bio and move on */ | ||||
|  | @ -661,7 +739,7 @@ static void run_one_async_done(struct btrfs_work *work) | |||
| 	 * This changes nothing when cgroups aren't in use. | ||||
| 	 */ | ||||
| 	async->bio->bi_opf |= REQ_CGROUP_PUNT; | ||||
| 	btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num); | ||||
| 	btrfs_submit_bio(inode->root->fs_info, async->bio, async->mirror_num); | ||||
| } | ||||
| 
 | ||||
| static void run_one_async_free(struct btrfs_work *work) | ||||
|  | @ -679,11 +757,10 @@ static void run_one_async_free(struct btrfs_work *work) | |||
|  * - true if the work has been succesfuly submitted | ||||
|  * - false in case of error | ||||
|  */ | ||||
| bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num, | ||||
| 			 u64 dio_file_offset, | ||||
| 			 extent_submit_bio_start_t *submit_bio_start) | ||||
| bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, | ||||
| 			 u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||||
| 	struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||||
| 	struct async_submit_bio *async; | ||||
| 
 | ||||
| 	async = kmalloc(sizeof(*async), GFP_NOFS); | ||||
|  | @ -693,7 +770,7 @@ bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num, | |||
| 	async->inode = inode; | ||||
| 	async->bio = bio; | ||||
| 	async->mirror_num = mirror_num; | ||||
| 	async->submit_bio_start = submit_bio_start; | ||||
| 	async->submit_cmd = cmd; | ||||
| 
 | ||||
| 	btrfs_init_work(&async->work, run_one_async_start, run_one_async_done, | ||||
| 			run_one_async_free); | ||||
|  | @ -727,8 +804,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) | |||
| 	return errno_to_blk_status(ret); | ||||
| } | ||||
| 
 | ||||
| static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio, | ||||
| 					   u64 dio_file_offset) | ||||
| blk_status_t btree_submit_bio_start(struct bio *bio) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * when we're called for a write, we're already in the async | ||||
|  | @ -749,13 +825,14 @@ static bool should_async_write(struct btrfs_fs_info *fs_info, | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num) | ||||
| void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | ||||
| 	struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||||
| 	struct btrfs_bio *bbio = btrfs_bio(bio); | ||||
| 	blk_status_t ret; | ||||
| 
 | ||||
| 	bio->bi_opf |= REQ_META; | ||||
| 	bbio->is_metadata = 1; | ||||
| 
 | ||||
| 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) { | ||||
| 		btrfs_submit_bio(fs_info, bio, mirror_num); | ||||
|  | @ -766,8 +843,8 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_ | |||
| 	 * Kthread helpers are used to submit writes so that checksumming can | ||||
| 	 * happen in parallel across all CPUs. | ||||
| 	 */ | ||||
| 	if (should_async_write(fs_info, BTRFS_I(inode)) && | ||||
| 	    btrfs_wq_submit_bio(inode, bio, mirror_num, 0, btree_submit_bio_start)) | ||||
| 	if (should_async_write(fs_info, inode) && | ||||
| 	    btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_METADATA)) | ||||
| 		return; | ||||
| 
 | ||||
| 	ret = btree_csum_one_bio(bio); | ||||
|  | @ -919,28 +996,28 @@ struct extent_buffer *btrfs_find_create_tree_block( | |||
|  * Read tree block at logical address @bytenr and do variant basic but critical | ||||
|  * verification. | ||||
|  * | ||||
|  * @owner_root:		the objectid of the root owner for this block. | ||||
|  * @parent_transid:	expected transid of this tree block, skip check if 0 | ||||
|  * @level:		expected level, mandatory check | ||||
|  * @first_key:		expected key in slot 0, skip check if NULL | ||||
|  * @check:		expected tree parentness check, see comments of the | ||||
|  *			structure for details. | ||||
|  */ | ||||
| struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, | ||||
| 				      u64 owner_root, u64 parent_transid, | ||||
| 				      int level, struct btrfs_key *first_key) | ||||
| 				      struct btrfs_tree_parent_check *check) | ||||
| { | ||||
| 	struct extent_buffer *buf = NULL; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level); | ||||
| 	ASSERT(check); | ||||
| 
 | ||||
| 	buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root, | ||||
| 					   check->level); | ||||
| 	if (IS_ERR(buf)) | ||||
| 		return buf; | ||||
| 
 | ||||
| 	ret = btrfs_read_extent_buffer(buf, parent_transid, level, first_key); | ||||
| 	ret = btrfs_read_extent_buffer(buf, check); | ||||
| 	if (ret) { | ||||
| 		free_extent_buffer_stale(buf); | ||||
| 		return ERR_PTR(ret); | ||||
| 	} | ||||
| 	if (btrfs_check_eb_owner(buf, owner_root)) { | ||||
| 	if (btrfs_check_eb_owner(buf, check->owner_root)) { | ||||
| 		free_extent_buffer_stale(buf); | ||||
| 		return ERR_PTR(-EUCLEAN); | ||||
| 	} | ||||
|  | @ -1027,9 +1104,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, | |||
| 	root->anon_dev = 0; | ||||
| 	if (!dummy) { | ||||
| 		extent_io_tree_init(fs_info, &root->dirty_log_pages, | ||||
| 				    IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL); | ||||
| 				    IO_TREE_ROOT_DIRTY_LOG_PAGES); | ||||
| 		extent_io_tree_init(fs_info, &root->log_csum_range, | ||||
| 				    IO_TREE_LOG_CSUM_RANGE, NULL); | ||||
| 				    IO_TREE_LOG_CSUM_RANGE); | ||||
| 	} | ||||
| 
 | ||||
| 	spin_lock_init(&root->root_item_lock); | ||||
|  | @ -1167,6 +1244,13 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr) | |||
| 	return btrfs_global_root(fs_info, &key); | ||||
| } | ||||
| 
 | ||||
| struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) | ||||
| 		return fs_info->block_group_root; | ||||
| 	return btrfs_extent_root(fs_info, 0); | ||||
| } | ||||
| 
 | ||||
| struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | ||||
| 				     u64 objectid) | ||||
| { | ||||
|  | @ -1197,7 +1281,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 	if (IS_ERR(leaf)) { | ||||
| 		ret = PTR_ERR(leaf); | ||||
| 		leaf = NULL; | ||||
| 		goto fail_unlock; | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	root->node = leaf; | ||||
|  | @ -1232,9 +1316,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 	return root; | ||||
| 
 | ||||
| fail_unlock: | ||||
| 	if (leaf) | ||||
| 		btrfs_tree_unlock(leaf); | ||||
| fail: | ||||
| 	btrfs_put_root(root); | ||||
| 
 | ||||
|  | @ -1352,6 +1433,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, | |||
| 					      struct btrfs_key *key) | ||||
| { | ||||
| 	struct btrfs_root *root; | ||||
| 	struct btrfs_tree_parent_check check = { 0 }; | ||||
| 	struct btrfs_fs_info *fs_info = tree_root->fs_info; | ||||
| 	u64 generation; | ||||
| 	int ret; | ||||
|  | @ -1371,9 +1453,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, | |||
| 
 | ||||
| 	generation = btrfs_root_generation(&root->root_item); | ||||
| 	level = btrfs_root_level(&root->root_item); | ||||
| 	root->node = read_tree_block(fs_info, | ||||
| 				     btrfs_root_bytenr(&root->root_item), | ||||
| 				     key->objectid, generation, level, NULL); | ||||
| 	check.level = level; | ||||
| 	check.transid = generation; | ||||
| 	check.owner_root = key->objectid; | ||||
| 	root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), | ||||
| 				     &check); | ||||
| 	if (IS_ERR(root->node)) { | ||||
| 		ret = PTR_ERR(root->node); | ||||
| 		root->node = NULL; | ||||
|  | @ -2084,8 +2168,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | |||
| 	btrfs_destroy_workqueue(fs_info->workers); | ||||
| 	if (fs_info->endio_workers) | ||||
| 		destroy_workqueue(fs_info->endio_workers); | ||||
| 	if (fs_info->endio_raid56_workers) | ||||
| 		destroy_workqueue(fs_info->endio_raid56_workers); | ||||
| 	if (fs_info->rmw_workers) | ||||
| 		destroy_workqueue(fs_info->rmw_workers); | ||||
| 	if (fs_info->compressed_write_workers) | ||||
|  | @ -2231,7 +2313,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) | |||
| 
 | ||||
| 	RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||||
| 	extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, | ||||
| 			    IO_TREE_BTREE_INODE_IO, NULL); | ||||
| 			    IO_TREE_BTREE_INODE_IO); | ||||
| 	extent_map_tree_init(&BTRFS_I(inode)->extent_tree); | ||||
| 
 | ||||
| 	BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root); | ||||
|  | @ -2291,8 +2373,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) | |||
| 		alloc_workqueue("btrfs-endio", flags, max_active); | ||||
| 	fs_info->endio_meta_workers = | ||||
| 		alloc_workqueue("btrfs-endio-meta", flags, max_active); | ||||
| 	fs_info->endio_raid56_workers = | ||||
| 		alloc_workqueue("btrfs-endio-raid56", flags, max_active); | ||||
| 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); | ||||
| 	fs_info->endio_write_workers = | ||||
| 		btrfs_alloc_workqueue(fs_info, "endio-write", flags, | ||||
|  | @ -2314,7 +2394,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) | |||
| 	      fs_info->delalloc_workers && fs_info->flush_workers && | ||||
| 	      fs_info->endio_workers && fs_info->endio_meta_workers && | ||||
| 	      fs_info->compressed_write_workers && | ||||
| 	      fs_info->endio_write_workers && fs_info->endio_raid56_workers && | ||||
| 	      fs_info->endio_write_workers && | ||||
| 	      fs_info->endio_freespace_worker && fs_info->rmw_workers && | ||||
| 	      fs_info->caching_workers && fs_info->fixup_workers && | ||||
| 	      fs_info->delayed_workers && fs_info->qgroup_rescan_workers && | ||||
|  | @ -2350,6 +2430,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, | |||
| 			    struct btrfs_fs_devices *fs_devices) | ||||
| { | ||||
| 	int ret; | ||||
| 	struct btrfs_tree_parent_check check = { 0 }; | ||||
| 	struct btrfs_root *log_tree_root; | ||||
| 	struct btrfs_super_block *disk_super = fs_info->super_copy; | ||||
| 	u64 bytenr = btrfs_super_log_root(disk_super); | ||||
|  | @ -2365,10 +2446,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, | |||
| 	if (!log_tree_root) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	log_tree_root->node = read_tree_block(fs_info, bytenr, | ||||
| 					      BTRFS_TREE_LOG_OBJECTID, | ||||
| 					      fs_info->generation + 1, level, | ||||
| 					      NULL); | ||||
| 	check.level = level; | ||||
| 	check.transid = fs_info->generation + 1; | ||||
| 	check.owner_root = BTRFS_TREE_LOG_OBJECTID; | ||||
| 	log_tree_root->node = read_tree_block(fs_info, bytenr, &check); | ||||
| 	if (IS_ERR(log_tree_root->node)) { | ||||
| 		btrfs_warn(fs_info, "failed to read log tree"); | ||||
| 		ret = PTR_ERR(log_tree_root->node); | ||||
|  | @ -2846,10 +2927,14 @@ out: | |||
| 
 | ||||
| static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level) | ||||
| { | ||||
| 	struct btrfs_tree_parent_check check = { | ||||
| 		.level = level, | ||||
| 		.transid = gen, | ||||
| 		.owner_root = root->root_key.objectid | ||||
| 	}; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	root->node = read_tree_block(root->fs_info, bytenr, | ||||
| 				     root->root_key.objectid, gen, level, NULL); | ||||
| 	root->node = read_tree_block(root->fs_info, bytenr, &check); | ||||
| 	if (IS_ERR(root->node)) { | ||||
| 		ret = PTR_ERR(root->node); | ||||
| 		root->node = NULL; | ||||
|  | @ -3057,7 +3142,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) | |||
| 	fs_info->block_group_cache_tree = RB_ROOT_CACHED; | ||||
| 
 | ||||
| 	extent_io_tree_init(fs_info, &fs_info->excluded_extents, | ||||
| 			    IO_TREE_FS_EXCLUDED_EXTENTS, NULL); | ||||
| 			    IO_TREE_FS_EXCLUDED_EXTENTS); | ||||
| 
 | ||||
| 	mutex_init(&fs_info->ordered_operations_mutex); | ||||
| 	mutex_init(&fs_info->tree_log_mutex); | ||||
|  | @ -3743,10 +3828,18 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device | |||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Mount does not set all options immediately, we can do it now and do | ||||
| 	 * not have to wait for transaction commit | ||||
| 	 * For devices supporting discard turn on discard=async automatically, | ||||
| 	 * unless it's already set or disabled. This could be turned off by | ||||
| 	 * nodiscard for the same mount. | ||||
| 	 */ | ||||
| 	btrfs_apply_pending_changes(fs_info); | ||||
| 	if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) || | ||||
| 	      btrfs_test_opt(fs_info, DISCARD_ASYNC) || | ||||
| 	      btrfs_test_opt(fs_info, NODISCARD)) && | ||||
| 	    fs_info->fs_devices->discardable) { | ||||
| 		btrfs_set_and_info(fs_info, DISCARD_ASYNC, | ||||
| 				   "auto enabling async discard"); | ||||
| 		btrfs_clear_opt(fs_info->mount_opt, NODISCARD); | ||||
| 	} | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||||
| 	if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) { | ||||
|  | @ -3875,7 +3968,7 @@ static void btrfs_end_super_write(struct bio *bio) | |||
| 		if (bio->bi_status) { | ||||
| 			btrfs_warn_rl_in_rcu(device->fs_info, | ||||
| 				"lost page write due to IO error on %s (%d)", | ||||
| 				rcu_str_deref(device->name), | ||||
| 				btrfs_dev_name(device), | ||||
| 				blk_status_to_errno(bio->bi_status)); | ||||
| 			ClearPageUptodate(page); | ||||
| 			SetPageError(page); | ||||
|  |  | |||
|  | @ -27,14 +27,14 @@ static inline u64 btrfs_sb_offset(int mirror) | |||
| 
 | ||||
| struct btrfs_device; | ||||
| struct btrfs_fs_devices; | ||||
| struct btrfs_tree_parent_check; | ||||
| 
 | ||||
| void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_init_fs_info(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_verify_level_key(struct extent_buffer *eb, int level, | ||||
| 			   struct btrfs_key *first_key, u64 parent_transid); | ||||
| struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, | ||||
| 				      u64 owner_root, u64 parent_transid, | ||||
| 				      int level, struct btrfs_key *first_key); | ||||
| 				      struct btrfs_tree_parent_check *check); | ||||
| struct extent_buffer *btrfs_find_create_tree_block( | ||||
| 						struct btrfs_fs_info *fs_info, | ||||
| 						u64 bytenr, u64 owner_root, | ||||
|  | @ -75,6 +75,7 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info, | |||
| 				     struct btrfs_key *key); | ||||
| struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr); | ||||
| struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr); | ||||
| struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info); | ||||
| 
 | ||||
| void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); | ||||
|  | @ -85,7 +86,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
| int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, | ||||
| 				   struct page *page, u64 start, u64 end, | ||||
| 				   int mirror); | ||||
| void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num); | ||||
| void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num); | ||||
| #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||||
| struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); | ||||
| #endif | ||||
|  | @ -106,24 +107,22 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) | |||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) | ||||
| 		return fs_info->block_group_root; | ||||
| 	return btrfs_extent_root(fs_info, 0); | ||||
| } | ||||
| 
 | ||||
| void btrfs_put_root(struct btrfs_root *root); | ||||
| void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | ||||
| int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | ||||
| 			  int atomic); | ||||
| int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid, | ||||
| 			     int level, struct btrfs_key *first_key); | ||||
| bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num, | ||||
| 			 u64 dio_file_offset, | ||||
| 			 extent_submit_bio_start_t *submit_bio_start); | ||||
| blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, | ||||
| 			  int mirror_num); | ||||
| int btrfs_read_extent_buffer(struct extent_buffer *buf, | ||||
| 			     struct btrfs_tree_parent_check *check); | ||||
| 
 | ||||
| enum btrfs_wq_submit_cmd { | ||||
| 	WQ_SUBMIT_METADATA, | ||||
| 	WQ_SUBMIT_DATA, | ||||
| 	WQ_SUBMIT_DATA_DIO, | ||||
| }; | ||||
| 
 | ||||
| bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, | ||||
| 			 u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd); | ||||
| blk_status_t btree_submit_bio_start(struct bio *bio); | ||||
| int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans, | ||||
| 			      struct btrfs_root *root); | ||||
| int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | ||||
|  | @ -136,8 +135,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, | |||
| 				  struct btrfs_fs_info *fs_info); | ||||
| struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | ||||
| 				     u64 objectid); | ||||
| int btree_lock_page_hook(struct page *page, void *data, | ||||
| 				void (*flush_fn)(void *)); | ||||
| int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); | ||||
| int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid); | ||||
| int btrfs_init_root_free_objectid(struct btrfs_root *root); | ||||
|  |  | |||
|  | @ -7,6 +7,8 @@ | |||
| #include "btrfs_inode.h" | ||||
| #include "print-tree.h" | ||||
| #include "export.h" | ||||
| #include "accessors.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ | ||||
| 						 parent_objectid) / 4) | ||||
|  | @ -57,9 +59,20 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, | |||
| 	return type; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Read dentry of inode with @objectid from filesystem root @root_objectid. | ||||
|  * | ||||
|  * @sb:             the filesystem super block | ||||
|  * @objectid:       inode objectid | ||||
|  * @root_objectid:  object id of the subvolume root where to look up the inode | ||||
|  * @generation:     optional, if not zero, verify that the found inode | ||||
|  *                  generation matches | ||||
|  * | ||||
|  * Return dentry alias for the inode, otherwise an error. In case the | ||||
|  * generation does not match return ESTALE. | ||||
|  */ | ||||
| struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | ||||
| 				u64 root_objectid, u64 generation, | ||||
| 				int check_generation) | ||||
| 				u64 root_objectid, u64 generation) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||||
| 	struct btrfs_root *root; | ||||
|  | @ -77,7 +90,7 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
| 	if (IS_ERR(inode)) | ||||
| 		return ERR_CAST(inode); | ||||
| 
 | ||||
| 	if (check_generation && generation != inode->i_generation) { | ||||
| 	if (generation != 0 && generation != inode->i_generation) { | ||||
| 		iput(inode); | ||||
| 		return ERR_PTR(-ESTALE); | ||||
| 	} | ||||
|  | @ -106,7 +119,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | |||
| 	objectid = fid->parent_objectid; | ||||
| 	generation = fid->parent_gen; | ||||
| 
 | ||||
| 	return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); | ||||
| 	return btrfs_get_dentry(sb, objectid, root_objectid, generation); | ||||
| } | ||||
| 
 | ||||
| static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | ||||
|  | @ -128,7 +141,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
| 	root_objectid = fid->root_objectid; | ||||
| 	generation = fid->gen; | ||||
| 
 | ||||
| 	return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); | ||||
| 	return btrfs_get_dentry(sb, objectid, root_objectid, generation); | ||||
| } | ||||
| 
 | ||||
| struct dentry *btrfs_get_parent(struct dentry *child) | ||||
|  | @ -188,7 +201,7 @@ struct dentry *btrfs_get_parent(struct dentry *child) | |||
| 
 | ||||
| 	if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { | ||||
| 		return btrfs_get_dentry(fs_info->sb, key.objectid, | ||||
| 					found_key.offset, 0, 0); | ||||
| 					found_key.offset, 0); | ||||
| 	} | ||||
| 
 | ||||
| 	return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root)); | ||||
|  |  | |||
|  | @ -19,8 +19,7 @@ struct btrfs_fid { | |||
| } __attribute__ ((packed)); | ||||
| 
 | ||||
| struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | ||||
| 				u64 root_objectid, u64 generation, | ||||
| 				int check_generation); | ||||
| 				u64 root_objectid, u64 generation); | ||||
| struct dentry *btrfs_get_parent(struct dentry *child); | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -2,6 +2,7 @@ | |||
| 
 | ||||
| #include <linux/slab.h> | ||||
| #include <trace/events/btrfs.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "extent-io-tree.h" | ||||
| #include "btrfs_inode.h" | ||||
|  | @ -57,17 +58,17 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, | |||
| 						       struct extent_io_tree *tree, | ||||
| 						       u64 start, u64 end) | ||||
| { | ||||
| 	struct inode *inode = tree->private_data; | ||||
| 	struct btrfs_inode *inode = tree->inode; | ||||
| 	u64 isize; | ||||
| 
 | ||||
| 	if (!inode) | ||||
| 		return; | ||||
| 
 | ||||
| 	isize = i_size_read(inode); | ||||
| 	isize = i_size_read(&inode->vfs_inode); | ||||
| 	if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { | ||||
| 		btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, | ||||
| 		btrfs_debug_rl(inode->root->fs_info, | ||||
| 		    "%s: ino %llu isize %llu odd range [%llu,%llu]", | ||||
| 			caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); | ||||
| 			caller, btrfs_ino(inode), isize, start, end); | ||||
| 	} | ||||
| } | ||||
| #else | ||||
|  | @ -93,13 +94,12 @@ struct tree_entry { | |||
| }; | ||||
| 
 | ||||
| void extent_io_tree_init(struct btrfs_fs_info *fs_info, | ||||
| 			 struct extent_io_tree *tree, unsigned int owner, | ||||
| 			 void *private_data) | ||||
| 			 struct extent_io_tree *tree, unsigned int owner) | ||||
| { | ||||
| 	tree->fs_info = fs_info; | ||||
| 	tree->state = RB_ROOT; | ||||
| 	spin_lock_init(&tree->lock); | ||||
| 	tree->private_data = private_data; | ||||
| 	tree->inode = NULL; | ||||
| 	tree->owner = owner; | ||||
| 	if (owner == IO_TREE_INODE_FILE_EXTENT) | ||||
| 		lockdep_set_class(&tree->lock, &file_extent_tree_class); | ||||
|  | @ -346,9 +346,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state) | |||
| 	other = prev_state(state); | ||||
| 	if (other && other->end == state->start - 1 && | ||||
| 	    other->state == state->state) { | ||||
| 		if (tree->private_data) | ||||
| 			btrfs_merge_delalloc_extent(tree->private_data, | ||||
| 						    state, other); | ||||
| 		if (tree->inode) | ||||
| 			btrfs_merge_delalloc_extent(tree->inode, state, other); | ||||
| 		state->start = other->start; | ||||
| 		rb_erase(&other->rb_node, &tree->state); | ||||
| 		RB_CLEAR_NODE(&other->rb_node); | ||||
|  | @ -357,9 +356,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state) | |||
| 	other = next_state(state); | ||||
| 	if (other && other->start == state->end + 1 && | ||||
| 	    other->state == state->state) { | ||||
| 		if (tree->private_data) | ||||
| 			btrfs_merge_delalloc_extent(tree->private_data, state, | ||||
| 						    other); | ||||
| 		if (tree->inode) | ||||
| 			btrfs_merge_delalloc_extent(tree->inode, state, other); | ||||
| 		state->end = other->end; | ||||
| 		rb_erase(&other->rb_node, &tree->state); | ||||
| 		RB_CLEAR_NODE(&other->rb_node); | ||||
|  | @ -374,8 +372,8 @@ static void set_state_bits(struct extent_io_tree *tree, | |||
| 	u32 bits_to_set = bits & ~EXTENT_CTLBITS; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (tree->private_data) | ||||
| 		btrfs_set_delalloc_extent(tree->private_data, state, bits); | ||||
| 	if (tree->inode) | ||||
| 		btrfs_set_delalloc_extent(tree->inode, state, bits); | ||||
| 
 | ||||
| 	ret = add_extent_changeset(state, bits_to_set, changeset, 1); | ||||
| 	BUG_ON(ret < 0); | ||||
|  | @ -397,7 +395,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 			u32 bits, struct extent_changeset *changeset) | ||||
| { | ||||
| 	struct rb_node **node; | ||||
| 	struct rb_node *parent; | ||||
| 	struct rb_node *parent = NULL; | ||||
| 	const u64 end = state->end; | ||||
| 
 | ||||
| 	set_state_bits(tree, state, bits, changeset); | ||||
|  | @ -462,8 +460,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 	struct rb_node *parent = NULL; | ||||
| 	struct rb_node **node; | ||||
| 
 | ||||
| 	if (tree->private_data) | ||||
| 		btrfs_split_delalloc_extent(tree->private_data, orig, split); | ||||
| 	if (tree->inode) | ||||
| 		btrfs_split_delalloc_extent(tree->inode, orig, split); | ||||
| 
 | ||||
| 	prealloc->start = orig->start; | ||||
| 	prealloc->end = split - 1; | ||||
|  | @ -510,8 +508,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree, | |||
| 	u32 bits_to_clear = bits & ~EXTENT_CTLBITS; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (tree->private_data) | ||||
| 		btrfs_clear_delalloc_extent(tree->private_data, state, bits); | ||||
| 	if (tree->inode) | ||||
| 		btrfs_clear_delalloc_extent(tree->inode, state, bits); | ||||
| 
 | ||||
| 	ret = add_extent_changeset(state, bits_to_clear, changeset, 0); | ||||
| 	BUG_ON(ret < 0); | ||||
|  | @ -572,7 +570,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 	if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) | ||||
| 		clear = 1; | ||||
| again: | ||||
| 	if (!prealloc && gfpflags_allow_blocking(mask)) { | ||||
| 	if (!prealloc) { | ||||
| 		/*
 | ||||
| 		 * Don't care for allocation failure here because we might end | ||||
| 		 * up not needing the pre-allocated extent state at all, which | ||||
|  | @ -636,7 +634,8 @@ hit_next: | |||
| 
 | ||||
| 	if (state->start < start) { | ||||
| 		prealloc = alloc_extent_state_atomic(prealloc); | ||||
| 		BUG_ON(!prealloc); | ||||
| 		if (!prealloc) | ||||
| 			goto search_again; | ||||
| 		err = split_state(tree, state, prealloc, start); | ||||
| 		if (err) | ||||
| 			extent_io_tree_panic(tree, err); | ||||
|  | @ -657,7 +656,8 @@ hit_next: | |||
| 	 */ | ||||
| 	if (state->start <= end && state->end > end) { | ||||
| 		prealloc = alloc_extent_state_atomic(prealloc); | ||||
| 		BUG_ON(!prealloc); | ||||
| 		if (!prealloc) | ||||
| 			goto search_again; | ||||
| 		err = split_state(tree, state, prealloc, end + 1); | ||||
| 		if (err) | ||||
| 			extent_io_tree_panic(tree, err); | ||||
|  | @ -714,7 +714,8 @@ static void wait_on_state(struct extent_io_tree *tree, | |||
|  * The range [start, end] is inclusive. | ||||
|  * The tree lock is taken by this function | ||||
|  */ | ||||
| void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits) | ||||
| void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, | ||||
| 		     struct extent_state **cached_state) | ||||
| { | ||||
| 	struct extent_state *state; | ||||
| 
 | ||||
|  | @ -722,6 +723,16 @@ void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits) | |||
| 
 | ||||
| 	spin_lock(&tree->lock); | ||||
| again: | ||||
| 	/*
 | ||||
| 	 * Maintain cached_state, as we may not remove it from the tree if there | ||||
| 	 * are more bits than the bits we're waiting on set on this state. | ||||
| 	 */ | ||||
| 	if (cached_state && *cached_state) { | ||||
| 		state = *cached_state; | ||||
| 		if (extent_state_in_tree(state) && | ||||
| 		    state->start <= start && start < state->end) | ||||
| 			goto process_node; | ||||
| 	} | ||||
| 	while (1) { | ||||
| 		/*
 | ||||
| 		 * This search will find all the extents that end after our | ||||
|  | @ -752,6 +763,12 @@ process_node: | |||
| 		} | ||||
| 	} | ||||
| out: | ||||
| 	/* This state is no longer useful, clear it and free it up. */ | ||||
| 	if (cached_state && *cached_state) { | ||||
| 		state = *cached_state; | ||||
| 		*cached_state = NULL; | ||||
| 		free_extent_state(state); | ||||
| 	} | ||||
| 	spin_unlock(&tree->lock); | ||||
| } | ||||
| 
 | ||||
|  | @ -939,13 +956,17 @@ out: | |||
|  * sleeping, so the gfp mask is used to indicate what is allowed. | ||||
|  * | ||||
|  * If any of the exclusive bits are set, this will fail with -EEXIST if some | ||||
|  * part of the range already has the desired bits set.  The start of the | ||||
|  * existing range is returned in failed_start in this case. | ||||
|  * part of the range already has the desired bits set.  The extent_state of the | ||||
|  * existing range is returned in failed_state in this case, and the start of the | ||||
|  * existing range is returned in failed_start.  failed_state is used as an | ||||
|  * optimization for wait_extent_bit, failed_start must be used as the source of | ||||
|  * truth as failed_state may have changed since we returned. | ||||
|  * | ||||
|  * [start, end] is inclusive This takes the tree lock. | ||||
|  */ | ||||
| static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||||
| 			    u32 bits, u64 *failed_start, | ||||
| 			    struct extent_state **failed_state, | ||||
| 			    struct extent_state **cached_state, | ||||
| 			    struct extent_changeset *changeset, gfp_t mask) | ||||
| { | ||||
|  | @ -964,9 +985,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 	if (exclusive_bits) | ||||
| 		ASSERT(failed_start); | ||||
| 	else | ||||
| 		ASSERT(failed_start == NULL); | ||||
| 		ASSERT(failed_start == NULL && failed_state == NULL); | ||||
| again: | ||||
| 	if (!prealloc && gfpflags_allow_blocking(mask)) { | ||||
| 	if (!prealloc) { | ||||
| 		/*
 | ||||
| 		 * Don't care for allocation failure here because we might end | ||||
| 		 * up not needing the pre-allocated extent state at all, which | ||||
|  | @ -991,7 +1012,8 @@ again: | |||
| 	state = tree_search_for_insert(tree, start, &p, &parent); | ||||
| 	if (!state) { | ||||
| 		prealloc = alloc_extent_state_atomic(prealloc); | ||||
| 		BUG_ON(!prealloc); | ||||
| 		if (!prealloc) | ||||
| 			goto search_again; | ||||
| 		prealloc->start = start; | ||||
| 		prealloc->end = end; | ||||
| 		insert_state_fast(tree, prealloc, p, parent, bits, changeset); | ||||
|  | @ -1012,6 +1034,7 @@ hit_next: | |||
| 	if (state->start == start && state->end <= end) { | ||||
| 		if (state->state & exclusive_bits) { | ||||
| 			*failed_start = state->start; | ||||
| 			cache_state(state, failed_state); | ||||
| 			err = -EEXIST; | ||||
| 			goto out; | ||||
| 		} | ||||
|  | @ -1047,6 +1070,7 @@ hit_next: | |||
| 	if (state->start < start) { | ||||
| 		if (state->state & exclusive_bits) { | ||||
| 			*failed_start = start; | ||||
| 			cache_state(state, failed_state); | ||||
| 			err = -EEXIST; | ||||
| 			goto out; | ||||
| 		} | ||||
|  | @ -1062,7 +1086,8 @@ hit_next: | |||
| 		} | ||||
| 
 | ||||
| 		prealloc = alloc_extent_state_atomic(prealloc); | ||||
| 		BUG_ON(!prealloc); | ||||
| 		if (!prealloc) | ||||
| 			goto search_again; | ||||
| 		err = split_state(tree, state, prealloc, start); | ||||
| 		if (err) | ||||
| 			extent_io_tree_panic(tree, err); | ||||
|  | @ -1099,7 +1124,8 @@ hit_next: | |||
| 			this_end = last_start - 1; | ||||
| 
 | ||||
| 		prealloc = alloc_extent_state_atomic(prealloc); | ||||
| 		BUG_ON(!prealloc); | ||||
| 		if (!prealloc) | ||||
| 			goto search_again; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Avoid to free 'prealloc' if it can be merged with the later | ||||
|  | @ -1125,12 +1151,14 @@ hit_next: | |||
| 	if (state->start <= end && state->end > end) { | ||||
| 		if (state->state & exclusive_bits) { | ||||
| 			*failed_start = start; | ||||
| 			cache_state(state, failed_state); | ||||
| 			err = -EEXIST; | ||||
| 			goto out; | ||||
| 		} | ||||
| 
 | ||||
| 		prealloc = alloc_extent_state_atomic(prealloc); | ||||
| 		BUG_ON(!prealloc); | ||||
| 		if (!prealloc) | ||||
| 			goto search_again; | ||||
| 		err = split_state(tree, state, prealloc, end + 1); | ||||
| 		if (err) | ||||
| 			extent_io_tree_panic(tree, err); | ||||
|  | @ -1162,8 +1190,8 @@ out: | |||
| int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||||
| 		   u32 bits, struct extent_state **cached_state, gfp_t mask) | ||||
| { | ||||
| 	return __set_extent_bit(tree, start, end, bits, NULL, cached_state, | ||||
| 				NULL, mask); | ||||
| 	return __set_extent_bit(tree, start, end, bits, NULL, NULL, | ||||
| 				cached_state, NULL, mask); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -1397,7 +1425,7 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, | |||
| 				 u64 *start_ret, u64 *end_ret, u32 bits) | ||||
| { | ||||
| 	struct extent_state *state; | ||||
| 	struct extent_state *prev = NULL, *next; | ||||
| 	struct extent_state *prev = NULL, *next = NULL; | ||||
| 
 | ||||
| 	spin_lock(&tree->lock); | ||||
| 
 | ||||
|  | @ -1487,15 +1515,37 @@ out: | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Count the number of bytes in the tree that have a given bit(s) set.  This | ||||
|  * can be fairly slow, except for EXTENT_DIRTY which is cached.  The total | ||||
|  * number found is returned. | ||||
|  * Count the number of bytes in the tree that have a given bit(s) set for a | ||||
|  * given range. | ||||
|  * | ||||
|  * @tree:         The io tree to search. | ||||
|  * @start:        The start offset of the range. This value is updated to the | ||||
|  *                offset of the first byte found with the given bit(s), so it | ||||
|  *                can end up being bigger than the initial value. | ||||
|  * @search_end:   The end offset (inclusive value) of the search range. | ||||
|  * @max_bytes:    The maximum byte count we are interested. The search stops | ||||
|  *                once it reaches this count. | ||||
|  * @bits:         The bits the range must have in order to be accounted for. | ||||
|  *                If multiple bits are set, then only subranges that have all | ||||
|  *                the bits set are accounted for. | ||||
|  * @contig:       Indicate if we should ignore holes in the range or not. If | ||||
|  *                this is true, then stop once we find a hole. | ||||
|  * @cached_state: A cached state to be used across multiple calls to this | ||||
|  *                function in order to speedup searches. Use NULL if this is | ||||
|  *                called only once or if each call does not start where the | ||||
|  *                previous one ended. | ||||
|  * | ||||
|  * Returns the total number of bytes found within the given range that have | ||||
|  * all given bits set. If the returned number of bytes is greater than zero | ||||
|  * then @start is updated with the offset of the first byte with the bits set. | ||||
|  */ | ||||
| u64 count_range_bits(struct extent_io_tree *tree, | ||||
| 		     u64 *start, u64 search_end, u64 max_bytes, | ||||
| 		     u32 bits, int contig) | ||||
| 		     u32 bits, int contig, | ||||
| 		     struct extent_state **cached_state) | ||||
| { | ||||
| 	struct extent_state *state; | ||||
| 	struct extent_state *state = NULL; | ||||
| 	struct extent_state *cached; | ||||
| 	u64 cur_start = *start; | ||||
| 	u64 total_bytes = 0; | ||||
| 	u64 last = 0; | ||||
|  | @ -1506,11 +1556,41 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 
 | ||||
| 	spin_lock(&tree->lock); | ||||
| 
 | ||||
| 	if (!cached_state || !*cached_state) | ||||
| 		goto search; | ||||
| 
 | ||||
| 	cached = *cached_state; | ||||
| 
 | ||||
| 	if (!extent_state_in_tree(cached)) | ||||
| 		goto search; | ||||
| 
 | ||||
| 	if (cached->start <= cur_start && cur_start <= cached->end) { | ||||
| 		state = cached; | ||||
| 	} else if (cached->start > cur_start) { | ||||
| 		struct extent_state *prev; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * The cached state starts after our search range's start. Check | ||||
| 		 * if the previous state record starts at or before the range we | ||||
| 		 * are looking for, and if so, use it - this is a common case | ||||
| 		 * when there are holes between records in the tree. If there is | ||||
| 		 * no previous state record, we can start from our cached state. | ||||
| 		 */ | ||||
| 		prev = prev_state(cached); | ||||
| 		if (!prev) | ||||
| 			state = cached; | ||||
| 		else if (prev->start <= cur_start && cur_start <= prev->end) | ||||
| 			state = prev; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This search will find all the extents that end after our range | ||||
| 	 * starts. | ||||
| 	 */ | ||||
| 	state = tree_search(tree, cur_start); | ||||
| search: | ||||
| 	if (!state) | ||||
| 		state = tree_search(tree, cur_start); | ||||
| 
 | ||||
| 	while (state) { | ||||
| 		if (state->start > search_end) | ||||
| 			break; | ||||
|  | @ -1531,7 +1611,16 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 		} | ||||
| 		state = next_state(state); | ||||
| 	} | ||||
| 
 | ||||
| 	if (cached_state) { | ||||
| 		free_extent_state(*cached_state); | ||||
| 		*cached_state = state; | ||||
| 		if (state) | ||||
| 			refcount_inc(&state->refs); | ||||
| 	} | ||||
| 
 | ||||
| 	spin_unlock(&tree->lock); | ||||
| 
 | ||||
| 	return total_bytes; | ||||
| } | ||||
| 
 | ||||
|  | @ -1598,8 +1687,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 	 */ | ||||
| 	ASSERT(!(bits & EXTENT_LOCKED)); | ||||
| 
 | ||||
| 	return __set_extent_bit(tree, start, end, bits, NULL, NULL, changeset, | ||||
| 				GFP_NOFS); | ||||
| 	return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, | ||||
| 				changeset, GFP_NOFS); | ||||
| } | ||||
| 
 | ||||
| int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||||
|  | @ -1615,17 +1704,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 				  changeset); | ||||
| } | ||||
| 
 | ||||
| int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) | ||||
| int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | ||||
| 		    struct extent_state **cached) | ||||
| { | ||||
| 	int err; | ||||
| 	u64 failed_start; | ||||
| 
 | ||||
| 	err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, | ||||
| 			       NULL, NULL, GFP_NOFS); | ||||
| 			       NULL, cached, NULL, GFP_NOFS); | ||||
| 	if (err == -EEXIST) { | ||||
| 		if (failed_start > start) | ||||
| 			clear_extent_bit(tree, start, failed_start - 1, | ||||
| 					 EXTENT_LOCKED, NULL); | ||||
| 					 EXTENT_LOCKED, cached); | ||||
| 		return 0; | ||||
| 	} | ||||
| 	return 1; | ||||
|  | @ -1638,20 +1728,22 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) | |||
| int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | ||||
| 		struct extent_state **cached_state) | ||||
| { | ||||
| 	struct extent_state *failed_state = NULL; | ||||
| 	int err; | ||||
| 	u64 failed_start; | ||||
| 
 | ||||
| 	err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, | ||||
| 			       cached_state, NULL, GFP_NOFS); | ||||
| 			       &failed_state, cached_state, NULL, GFP_NOFS); | ||||
| 	while (err == -EEXIST) { | ||||
| 		if (failed_start != start) | ||||
| 			clear_extent_bit(tree, start, failed_start - 1, | ||||
| 					 EXTENT_LOCKED, cached_state); | ||||
| 
 | ||||
| 		wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | ||||
| 		wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, | ||||
| 				&failed_state); | ||||
| 		err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, | ||||
| 				       &failed_start, cached_state, NULL, | ||||
| 				       GFP_NOFS); | ||||
| 				       &failed_start, &failed_state, | ||||
| 				       cached_state, NULL, GFP_NOFS); | ||||
| 	} | ||||
| 	return err; | ||||
| } | ||||
|  |  | |||
|  | @ -3,43 +3,48 @@ | |||
| #ifndef BTRFS_EXTENT_IO_TREE_H | ||||
| #define BTRFS_EXTENT_IO_TREE_H | ||||
| 
 | ||||
| #include "misc.h" | ||||
| 
 | ||||
| struct extent_changeset; | ||||
| struct io_failure_record; | ||||
| 
 | ||||
| /* Bits for the extent state */ | ||||
| #define EXTENT_DIRTY		(1U << 0) | ||||
| #define EXTENT_UPTODATE		(1U << 1) | ||||
| #define EXTENT_LOCKED		(1U << 2) | ||||
| #define EXTENT_NEW		(1U << 3) | ||||
| #define EXTENT_DELALLOC		(1U << 4) | ||||
| #define EXTENT_DEFRAG		(1U << 5) | ||||
| #define EXTENT_BOUNDARY		(1U << 6) | ||||
| #define EXTENT_NODATASUM	(1U << 7) | ||||
| #define EXTENT_CLEAR_META_RESV	(1U << 8) | ||||
| #define EXTENT_NEED_WAIT	(1U << 9) | ||||
| #define EXTENT_NORESERVE	(1U << 11) | ||||
| #define EXTENT_QGROUP_RESERVED	(1U << 12) | ||||
| #define EXTENT_CLEAR_DATA_RESV	(1U << 13) | ||||
| /*
 | ||||
|  * Must be cleared only during ordered extent completion or on error paths if we | ||||
|  * did not manage to submit bios and create the ordered extents for the range. | ||||
|  * Should not be cleared during page release and page invalidation (if there is | ||||
|  * an ordered extent in flight), that is left for the ordered extent completion. | ||||
|  */ | ||||
| #define EXTENT_DELALLOC_NEW	(1U << 14) | ||||
| /*
 | ||||
|  * When an ordered extent successfully completes for a region marked as a new | ||||
|  * delalloc range, use this flag when clearing a new delalloc range to indicate | ||||
|  * that the VFS' inode number of bytes should be incremented and the inode's new | ||||
|  * delalloc bytes decremented, in an atomic way to prevent races with stat(2). | ||||
|  */ | ||||
| #define EXTENT_ADD_INODE_BYTES  (1U << 15) | ||||
| 
 | ||||
| /*
 | ||||
|  * Set during truncate when we're clearing an entire range and we just want the | ||||
|  * extent states to go away. | ||||
|  */ | ||||
| #define EXTENT_CLEAR_ALL_BITS	(1U << 16) | ||||
| enum { | ||||
| 	ENUM_BIT(EXTENT_DIRTY), | ||||
| 	ENUM_BIT(EXTENT_UPTODATE), | ||||
| 	ENUM_BIT(EXTENT_LOCKED), | ||||
| 	ENUM_BIT(EXTENT_NEW), | ||||
| 	ENUM_BIT(EXTENT_DELALLOC), | ||||
| 	ENUM_BIT(EXTENT_DEFRAG), | ||||
| 	ENUM_BIT(EXTENT_BOUNDARY), | ||||
| 	ENUM_BIT(EXTENT_NODATASUM), | ||||
| 	ENUM_BIT(EXTENT_CLEAR_META_RESV), | ||||
| 	ENUM_BIT(EXTENT_NEED_WAIT), | ||||
| 	ENUM_BIT(EXTENT_NORESERVE), | ||||
| 	ENUM_BIT(EXTENT_QGROUP_RESERVED), | ||||
| 	ENUM_BIT(EXTENT_CLEAR_DATA_RESV), | ||||
| 	/*
 | ||||
| 	 * Must be cleared only during ordered extent completion or on error | ||||
| 	 * paths if we did not manage to submit bios and create the ordered | ||||
| 	 * extents for the range.  Should not be cleared during page release | ||||
| 	 * and page invalidation (if there is an ordered extent in flight), | ||||
| 	 * that is left for the ordered extent completion. | ||||
| 	 */ | ||||
| 	ENUM_BIT(EXTENT_DELALLOC_NEW), | ||||
| 	/*
 | ||||
| 	 * When an ordered extent successfully completes for a region marked as | ||||
| 	 * a new delalloc range, use this flag when clearing a new delalloc | ||||
| 	 * range to indicate that the VFS' inode number of bytes should be | ||||
| 	 * incremented and the inode's new delalloc bytes decremented, in an | ||||
| 	 * atomic way to prevent races with stat(2). | ||||
| 	 */ | ||||
| 	ENUM_BIT(EXTENT_ADD_INODE_BYTES), | ||||
| 	/*
 | ||||
| 	 * Set during truncate when we're clearing an entire range and we just | ||||
| 	 * want the extent states to go away. | ||||
| 	 */ | ||||
| 	ENUM_BIT(EXTENT_CLEAR_ALL_BITS), | ||||
| }; | ||||
| 
 | ||||
| #define EXTENT_DO_ACCOUNTING    (EXTENT_CLEAR_META_RESV | \ | ||||
| 				 EXTENT_CLEAR_DATA_RESV) | ||||
|  | @ -75,7 +80,8 @@ enum { | |||
| struct extent_io_tree { | ||||
| 	struct rb_root state; | ||||
| 	struct btrfs_fs_info *fs_info; | ||||
| 	void *private_data; | ||||
| 	/* Inode associated with this tree, or NULL. */ | ||||
| 	struct btrfs_inode *inode; | ||||
| 
 | ||||
| 	/* Who owns this io tree, should be one of IO_TREE_* */ | ||||
| 	u8 owner; | ||||
|  | @ -99,21 +105,22 @@ struct extent_state { | |||
| }; | ||||
| 
 | ||||
| void extent_io_tree_init(struct btrfs_fs_info *fs_info, | ||||
| 			 struct extent_io_tree *tree, unsigned int owner, | ||||
| 			 void *private_data); | ||||
| 			 struct extent_io_tree *tree, unsigned int owner); | ||||
| void extent_io_tree_release(struct extent_io_tree *tree); | ||||
| 
 | ||||
| int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | ||||
| 		struct extent_state **cached); | ||||
| 
 | ||||
| int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); | ||||
| int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | ||||
| 		    struct extent_state **cached); | ||||
| 
 | ||||
| int __init extent_state_init_cachep(void); | ||||
| void __cold extent_state_free_cachep(void); | ||||
| 
 | ||||
| u64 count_range_bits(struct extent_io_tree *tree, | ||||
| 		     u64 *start, u64 search_end, | ||||
| 		     u64 max_bytes, u32 bits, int contig); | ||||
| 		     u64 max_bytes, u32 bits, int contig, | ||||
| 		     struct extent_state **cached_state); | ||||
| 
 | ||||
| void free_extent_state(struct extent_state *state); | ||||
| int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||||
|  | @ -139,13 +146,6 @@ static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 				  GFP_NOFS, NULL); | ||||
| } | ||||
| 
 | ||||
| static inline int unlock_extent_atomic(struct extent_io_tree *tree, u64 start, | ||||
| 				       u64 end, struct extent_state **cached) | ||||
| { | ||||
| 	return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached, | ||||
| 				  GFP_ATOMIC, NULL); | ||||
| } | ||||
| 
 | ||||
| static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, | ||||
| 				    u64 end, u32 bits) | ||||
| { | ||||
|  | @ -217,13 +217,6 @@ static inline int set_extent_new(struct extent_io_tree *tree, u64 start, | |||
| 	return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS); | ||||
| } | ||||
| 
 | ||||
| static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start, | ||||
| 		u64 end, struct extent_state **cached_state, gfp_t mask) | ||||
| { | ||||
| 	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, | ||||
| 			      cached_state, mask); | ||||
| } | ||||
| 
 | ||||
| int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||||
| 			  u64 *start_ret, u64 *end_ret, u32 bits, | ||||
| 			  struct extent_state **cached_state); | ||||
|  | @ -234,6 +227,7 @@ int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, | |||
| bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, | ||||
| 			       u64 *end, u64 max_bytes, | ||||
| 			       struct extent_state **cached_state); | ||||
| void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits); | ||||
| void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, | ||||
| 		     struct extent_state **cached_state); | ||||
| 
 | ||||
| #endif /* BTRFS_EXTENT_IO_TREE_H */ | ||||
|  |  | |||
|  | @ -36,6 +36,13 @@ | |||
| #include "rcu-string.h" | ||||
| #include "zoned.h" | ||||
| #include "dev-replace.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "root-tree.h" | ||||
| #include "file-item.h" | ||||
| #include "orphan.h" | ||||
| #include "tree-checker.h" | ||||
| 
 | ||||
| #undef SCRAMBLE_DELAYED_REFS | ||||
| 
 | ||||
|  | @ -5255,8 +5262,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
| 	u64 bytenr; | ||||
| 	u64 generation; | ||||
| 	u64 parent; | ||||
| 	struct btrfs_tree_parent_check check = { 0 }; | ||||
| 	struct btrfs_key key; | ||||
| 	struct btrfs_key first_key; | ||||
| 	struct btrfs_ref ref = { 0 }; | ||||
| 	struct extent_buffer *next; | ||||
| 	int level = wc->level; | ||||
|  | @ -5278,7 +5285,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
| 	} | ||||
| 
 | ||||
| 	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||||
| 	btrfs_node_key_to_cpu(path->nodes[level], &first_key, | ||||
| 
 | ||||
| 	check.level = level - 1; | ||||
| 	check.transid = generation; | ||||
| 	check.owner_root = root->root_key.objectid; | ||||
| 	check.has_first_key = true; | ||||
| 	btrfs_node_key_to_cpu(path->nodes[level], &check.first_key, | ||||
| 			      path->slots[level]); | ||||
| 
 | ||||
| 	next = find_extent_buffer(fs_info, bytenr); | ||||
|  | @ -5340,8 +5352,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
| 	if (!next) { | ||||
| 		if (reada && level == 1) | ||||
| 			reada_walk_down(trans, root, wc, path); | ||||
| 		next = read_tree_block(fs_info, bytenr, root->root_key.objectid, | ||||
| 				       generation, level - 1, &first_key); | ||||
| 		next = read_tree_block(fs_info, bytenr, &check); | ||||
| 		if (IS_ERR(next)) { | ||||
| 			return PTR_ERR(next); | ||||
| 		} else if (!extent_buffer_uptodate(next)) { | ||||
|  | @ -5973,40 +5984,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * helper to account the unused space of all the readonly block group in the | ||||
|  * space_info. takes mirrors into account. | ||||
|  */ | ||||
| u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||||
| { | ||||
| 	struct btrfs_block_group *block_group; | ||||
| 	u64 free_bytes = 0; | ||||
| 	int factor; | ||||
| 
 | ||||
| 	/* It's df, we don't care if it's racy */ | ||||
| 	if (list_empty(&sinfo->ro_bgs)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock(&sinfo->lock); | ||||
| 	list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) { | ||||
| 		spin_lock(&block_group->lock); | ||||
| 
 | ||||
| 		if (!block_group->ro) { | ||||
| 			spin_unlock(&block_group->lock); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		factor = btrfs_bg_type_to_factor(block_group->flags); | ||||
| 		free_bytes += (block_group->length - | ||||
| 			       block_group->used) * factor; | ||||
| 
 | ||||
| 		spin_unlock(&block_group->lock); | ||||
| 	} | ||||
| 	spin_unlock(&sinfo->lock); | ||||
| 
 | ||||
| 	return free_bytes; | ||||
| } | ||||
| 
 | ||||
| int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, | ||||
| 				   u64 start, u64 end) | ||||
| { | ||||
|  | @ -6072,7 +6049,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) | |||
| 			btrfs_warn_in_rcu(fs_info, | ||||
| "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", | ||||
| 					  start, end - start + 1, | ||||
| 					  rcu_str_deref(device->name), | ||||
| 					  btrfs_dev_name(device), | ||||
| 					  device->total_bytes); | ||||
| 			mutex_unlock(&fs_info->chunk_mutex); | ||||
| 			ret = 0; | ||||
|  |  | |||
							
								
								
									
										78
									
								
								fs/btrfs/extent-tree.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								fs/btrfs/extent-tree.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,78 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_EXTENT_TREE_H | ||||
| #define BTRFS_EXTENT_TREE_H | ||||
| 
 | ||||
| enum btrfs_inline_ref_type { | ||||
| 	BTRFS_REF_TYPE_INVALID, | ||||
| 	BTRFS_REF_TYPE_BLOCK, | ||||
| 	BTRFS_REF_TYPE_DATA, | ||||
| 	BTRFS_REF_TYPE_ANY, | ||||
| }; | ||||
| 
 | ||||
| int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, | ||||
| 				     struct btrfs_extent_inline_ref *iref, | ||||
| 				     enum btrfs_inline_ref_type is_data); | ||||
| u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); | ||||
| 
 | ||||
| int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, | ||||
| 			      u64 start, u64 num_bytes); | ||||
| void btrfs_free_excluded_extents(struct btrfs_block_group *cache); | ||||
| int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count); | ||||
| void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, | ||||
| 				  struct btrfs_delayed_ref_root *delayed_refs, | ||||
| 				  struct btrfs_delayed_ref_head *head); | ||||
| int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); | ||||
| int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||||
| 			     struct btrfs_fs_info *fs_info, u64 bytenr, | ||||
| 			     u64 offset, int metadata, u64 *refs, u64 *flags); | ||||
| int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, | ||||
| 		     int reserved); | ||||
| int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | ||||
| 				    u64 bytenr, u64 num_bytes); | ||||
| int btrfs_exclude_logged_extents(struct extent_buffer *eb); | ||||
| int btrfs_cross_ref_exist(struct btrfs_root *root, | ||||
| 			  u64 objectid, u64 offset, u64 bytenr, bool strict, | ||||
| 			  struct btrfs_path *path); | ||||
| struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | ||||
| 					     struct btrfs_root *root, | ||||
| 					     u64 parent, u64 root_objectid, | ||||
| 					     const struct btrfs_disk_key *key, | ||||
| 					     int level, u64 hint, | ||||
| 					     u64 empty_size, | ||||
| 					     enum btrfs_lock_nesting nest); | ||||
| void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||||
| 			   u64 root_id, | ||||
| 			   struct extent_buffer *buf, | ||||
| 			   u64 parent, int last_ref); | ||||
| int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | ||||
| 				     struct btrfs_root *root, u64 owner, | ||||
| 				     u64 offset, u64 ram_bytes, | ||||
| 				     struct btrfs_key *ins); | ||||
| int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | ||||
| 				   u64 root_objectid, u64 owner, u64 offset, | ||||
| 				   struct btrfs_key *ins); | ||||
| int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, | ||||
| 			 u64 min_alloc_size, u64 empty_size, u64 hint_byte, | ||||
| 			 struct btrfs_key *ins, int is_data, int delalloc); | ||||
| int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||||
| 		  struct extent_buffer *buf, int full_backref); | ||||
| int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||||
| 		  struct extent_buffer *buf, int full_backref); | ||||
| int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | ||||
| 				struct extent_buffer *eb, u64 flags, int level); | ||||
| int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); | ||||
| 
 | ||||
| int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, | ||||
| 			       u64 start, u64 len, int delalloc); | ||||
| int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, u64 len); | ||||
| int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); | ||||
| int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref); | ||||
| int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, | ||||
| 				     int for_reloc); | ||||
| int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | ||||
| 			struct btrfs_root *root, | ||||
| 			struct extent_buffer *node, | ||||
| 			struct extent_buffer *parent); | ||||
| 
 | ||||
| #endif | ||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -9,6 +9,7 @@ | |||
| #include <linux/btrfs_tree.h> | ||||
| #include "compression.h" | ||||
| #include "ulist.h" | ||||
| #include "misc.h" | ||||
| 
 | ||||
| enum { | ||||
| 	EXTENT_BUFFER_UPTODATE, | ||||
|  | @ -29,13 +30,15 @@ enum { | |||
| }; | ||||
| 
 | ||||
| /* these are flags for __process_pages_contig */ | ||||
| #define PAGE_UNLOCK		(1 << 0) | ||||
| /* Page starts writeback, clear dirty bit and set writeback bit */ | ||||
| #define PAGE_START_WRITEBACK	(1 << 1) | ||||
| #define PAGE_END_WRITEBACK	(1 << 2) | ||||
| #define PAGE_SET_ORDERED	(1 << 3) | ||||
| #define PAGE_SET_ERROR		(1 << 4) | ||||
| #define PAGE_LOCK		(1 << 5) | ||||
| enum { | ||||
| 	ENUM_BIT(PAGE_UNLOCK), | ||||
| 	/* Page starts writeback, clear dirty bit and set writeback bit */ | ||||
| 	ENUM_BIT(PAGE_START_WRITEBACK), | ||||
| 	ENUM_BIT(PAGE_END_WRITEBACK), | ||||
| 	ENUM_BIT(PAGE_SET_ORDERED), | ||||
| 	ENUM_BIT(PAGE_SET_ERROR), | ||||
| 	ENUM_BIT(PAGE_LOCK), | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * page->private values.  Every page that is controlled by the extent | ||||
|  | @ -63,17 +66,11 @@ struct btrfs_inode; | |||
| struct btrfs_fs_info; | ||||
| struct io_failure_record; | ||||
| struct extent_io_tree; | ||||
| struct btrfs_tree_parent_check; | ||||
| 
 | ||||
| int __init extent_buffer_init_cachep(void); | ||||
| void __cold extent_buffer_free_cachep(void); | ||||
| 
 | ||||
| typedef void (submit_bio_hook_t)(struct inode *inode, struct bio *bio, | ||||
| 					 int mirror_num, | ||||
| 					 enum btrfs_compression_type compress_type); | ||||
| 
 | ||||
| typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode, | ||||
| 		struct bio *bio, u64 dio_file_offset); | ||||
| 
 | ||||
| #define INLINE_EXTENT_BUFFER_PAGES     (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE) | ||||
| struct extent_buffer { | ||||
| 	u64 start; | ||||
|  | @ -98,6 +95,39 @@ struct extent_buffer { | |||
| #endif | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Get the correct offset inside the page of extent buffer. | ||||
|  * | ||||
|  * @eb:		target extent buffer | ||||
|  * @start:	offset inside the extent buffer | ||||
|  * | ||||
|  * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases. | ||||
|  */ | ||||
| static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb, | ||||
| 					   unsigned long offset) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * For sectorsize == PAGE_SIZE case, eb->start will always be aligned | ||||
| 	 * to PAGE_SIZE, thus adding it won't cause any difference. | ||||
| 	 * | ||||
| 	 * For sectorsize < PAGE_SIZE, we must only read the data that belongs | ||||
| 	 * to the eb, thus we have to take the eb->start into consideration. | ||||
| 	 */ | ||||
| 	return offset_in_page(offset + eb->start); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long get_eb_page_index(unsigned long offset) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough. | ||||
| 	 * | ||||
| 	 * For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE, | ||||
| 	 * and have ensured that all tree blocks are contained in one page, | ||||
| 	 * thus we always get index == 0. | ||||
| 	 */ | ||||
| 	return offset >> PAGE_SHIFT; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Structure to record how many bytes and which ranges are set/cleared | ||||
|  */ | ||||
|  | @ -174,8 +204,8 @@ void free_extent_buffer_stale(struct extent_buffer *eb); | |||
| #define WAIT_NONE	0 | ||||
| #define WAIT_COMPLETE	1 | ||||
| #define WAIT_PAGE_LOCK	2 | ||||
| int read_extent_buffer_pages(struct extent_buffer *eb, int wait, | ||||
| 			     int mirror_num); | ||||
| int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, | ||||
| 			     struct btrfs_tree_parent_check *parent_check); | ||||
| void wait_on_extent_buffer_writeback(struct extent_buffer *eb); | ||||
| void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, | ||||
| 				u64 bytenr, u64 owner_root, u64 gen, int level); | ||||
|  | @ -248,7 +278,6 @@ int extent_invalidate_folio(struct extent_io_tree *tree, | |||
| int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array); | ||||
| 
 | ||||
| void end_extent_writepage(struct page *page, int err, u64 start, u64 end); | ||||
| int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num); | ||||
| 
 | ||||
| /*
 | ||||
|  * When IO fails, either with EIO or csum verification fails, we | ||||
|  | @ -272,9 +301,9 @@ struct io_failure_record { | |||
| 	int num_copies; | ||||
| }; | ||||
| 
 | ||||
| int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, | ||||
| int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_bbio, | ||||
| 			    u32 bio_offset, struct page *page, unsigned int pgoff, | ||||
| 			    submit_bio_hook_t *submit_bio_hook); | ||||
| 			    bool submit_buffered); | ||||
| void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end); | ||||
| int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, | ||||
| 			   struct page *page, unsigned int pg_offset); | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ | |||
| #include <linux/err.h> | ||||
| #include <linux/slab.h> | ||||
| #include <linux/spinlock.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "volumes.h" | ||||
| #include "extent_map.h" | ||||
|  | @ -27,12 +28,9 @@ void __cold extent_map_exit(void) | |||
| 	kmem_cache_destroy(extent_map_cache); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * extent_map_tree_init - initialize extent map tree | ||||
|  * @tree:		tree to initialize | ||||
|  * | ||||
|  * Initialize the extent tree @tree.  Should be called for each new inode | ||||
|  * or other user of the extent_map interface. | ||||
| /*
 | ||||
|  * Initialize the extent tree @tree.  Should be called for each new inode or | ||||
|  * other user of the extent_map interface. | ||||
|  */ | ||||
| void extent_map_tree_init(struct extent_map_tree *tree) | ||||
| { | ||||
|  | @ -41,12 +39,9 @@ void extent_map_tree_init(struct extent_map_tree *tree) | |||
| 	rwlock_init(&tree->lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * alloc_extent_map - allocate new extent map structure | ||||
|  * | ||||
|  * Allocate a new extent_map structure.  The new structure is | ||||
|  * returned with a reference count of one and needs to be | ||||
|  * freed using free_extent_map() | ||||
| /*
 | ||||
|  * Allocate a new extent_map structure.  The new structure is returned with a | ||||
|  * reference count of one and needs to be freed using free_extent_map() | ||||
|  */ | ||||
| struct extent_map *alloc_extent_map(void) | ||||
| { | ||||
|  | @ -61,12 +56,9 @@ struct extent_map *alloc_extent_map(void) | |||
| 	return em; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * free_extent_map - drop reference count of an extent_map | ||||
|  * @em:		extent map being released | ||||
|  * | ||||
|  * Drops the reference out on @em by one and free the structure | ||||
|  * if the reference count hits zero. | ||||
| /*
 | ||||
|  * Drop the reference out on @em by one and free the structure if the reference | ||||
|  * count hits zero. | ||||
|  */ | ||||
| void free_extent_map(struct extent_map *em) | ||||
| { | ||||
|  | @ -81,7 +73,7 @@ void free_extent_map(struct extent_map *em) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* simple helper to do math around the end of an extent, handling wrap */ | ||||
| /* Do the math around the end of an extent, handling wrapping. */ | ||||
| static u64 range_end(u64 start, u64 len) | ||||
| { | ||||
| 	if (start + len < start) | ||||
|  | @ -137,8 +129,8 @@ static int tree_insert(struct rb_root_cached *root, struct extent_map *em) | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * search through the tree for an extent_map with a given offset.  If | ||||
|  * it can't be found, try to find some neighboring extents | ||||
|  * Search through the tree for an extent_map with a given offset.  If it can't | ||||
|  * be found, try to find some neighboring extents | ||||
|  */ | ||||
| static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | ||||
| 				     struct rb_node **prev_or_next_ret) | ||||
|  | @ -190,7 +182,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| /* check to see if two extent_map structs are adjacent and safe to merge */ | ||||
| /* Check to see if two extent_map structs are adjacent and safe to merge. */ | ||||
| static int mergable_maps(struct extent_map *prev, struct extent_map *next) | ||||
| { | ||||
| 	if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) | ||||
|  | @ -288,8 +280,9 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * unpin_extent_cache - unpin an extent from the cache | ||||
| /*
 | ||||
|  * Unpin an extent from the cache. | ||||
|  * | ||||
|  * @tree:	tree to unpin the extent in | ||||
|  * @start:	logical offset in the file | ||||
|  * @len:	length of the extent | ||||
|  | @ -392,7 +385,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Add new extent map to the extent tree | ||||
|  * | ||||
|  * @tree:	tree to insert new map in | ||||
|  | @ -451,8 +444,9 @@ __lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 	return em; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lookup_extent_mapping - lookup extent_map | ||||
| /*
 | ||||
|  * Lookup extent_map that intersects @start + @len range. | ||||
|  * | ||||
|  * @tree:	tree to lookup in | ||||
|  * @start:	byte offset to start the search | ||||
|  * @len:	length of the lookup range | ||||
|  | @ -468,8 +462,9 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 	return __lookup_extent_mapping(tree, start, len, 1); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * search_extent_mapping - find a nearby extent map | ||||
| /*
 | ||||
|  * Find a nearby extent map intersecting @start + @len (not an exact search). | ||||
|  * | ||||
|  * @tree:	tree to lookup in | ||||
|  * @start:	byte offset to start the search | ||||
|  * @len:	length of the lookup range | ||||
|  | @ -485,13 +480,14 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | |||
| 	return __lookup_extent_mapping(tree, start, len, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * remove_extent_mapping - removes an extent_map from the extent tree | ||||
| /*
 | ||||
|  * Remove an extent_map from the extent tree. | ||||
|  * | ||||
|  * @tree:	extent tree to remove from | ||||
|  * @em:		extent map being removed | ||||
|  * | ||||
|  * Removes @em from @tree.  No reference counts are dropped, and no checks | ||||
|  * are done to see if the range is in use | ||||
|  * Remove @em from @tree.  No reference counts are dropped, and no checks | ||||
|  * are done to see if the range is in use. | ||||
|  */ | ||||
| void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | ||||
| { | ||||
|  | @ -523,7 +519,7 @@ void replace_extent_mapping(struct extent_map_tree *tree, | |||
| 	setup_extent_mapping(tree, new, modified); | ||||
| } | ||||
| 
 | ||||
| static struct extent_map *next_extent_map(struct extent_map *em) | ||||
| static struct extent_map *next_extent_map(const struct extent_map *em) | ||||
| { | ||||
| 	struct rb_node *next; | ||||
| 
 | ||||
|  | @ -585,8 +581,8 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree, | |||
| 	return add_extent_mapping(em_tree, em, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Add extent mapping into em_tree | ||||
| /*
 | ||||
|  * Add extent mapping into em_tree. | ||||
|  * | ||||
|  * @fs_info:  the filesystem | ||||
|  * @em_tree:  extent tree into which we want to insert the extent mapping | ||||
|  | @ -613,6 +609,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info, | |||
| 	int ret; | ||||
| 	struct extent_map *em = *em_in; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Tree-checker should have rejected any inline extent with non-zero | ||||
| 	 * file offset. Here just do a sanity check. | ||||
| 	 */ | ||||
| 	if (em->block_start == EXTENT_MAP_INLINE) | ||||
| 		ASSERT(em->start == 0); | ||||
| 
 | ||||
| 	ret = add_extent_mapping(em_tree, em, 0); | ||||
| 	/* it is possible that someone inserted the extent into the tree
 | ||||
| 	 * while we had the lock dropped.  It is also possible that | ||||
|  |  | |||
|  | @ -9,13 +9,18 @@ | |||
| #include <linux/highmem.h> | ||||
| #include <linux/sched/mm.h> | ||||
| #include <crypto/hash.h> | ||||
| #include "messages.h" | ||||
| #include "misc.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
| #include "volumes.h" | ||||
| #include "bio.h" | ||||
| #include "print-tree.h" | ||||
| #include "compression.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "file-item.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ | ||||
| 				   sizeof(struct btrfs_item) * 2) / \ | ||||
|  | @ -24,8 +29,8 @@ | |||
| #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ | ||||
| 				       PAGE_SIZE)) | ||||
| 
 | ||||
| /**
 | ||||
|  * Set inode's size according to filesystem options | ||||
| /*
 | ||||
|  * Set inode's size according to filesystem options. | ||||
|  * | ||||
|  * @inode:      inode we want to update the disk_i_size for | ||||
|  * @new_i_size: i_size we want to set to, 0 if we use i_size | ||||
|  | @ -64,8 +69,8 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz | |||
| 	spin_unlock(&inode->lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Mark range within a file as having a new extent inserted | ||||
| /*
 | ||||
|  * Mark range within a file as having a new extent inserted. | ||||
|  * | ||||
|  * @inode: inode being modified | ||||
|  * @start: start file offset of the file extent we've inserted | ||||
|  | @ -92,8 +97,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, | |||
| 			       EXTENT_DIRTY); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Marks an inode range as not having a backing extent | ||||
| /*
 | ||||
|  * Mark an inode range as not having a backing extent. | ||||
|  * | ||||
|  * @inode: inode being modified | ||||
|  * @start: start file offset of the file extent we've inserted | ||||
|  | @ -121,12 +126,26 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, | |||
| 				start + len - 1, EXTENT_DIRTY, NULL); | ||||
| } | ||||
| 
 | ||||
| static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, | ||||
| 					u16 csum_size) | ||||
| static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes) | ||||
| { | ||||
| 	u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size; | ||||
| 	ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize)); | ||||
| 
 | ||||
| 	return ncsums * fs_info->sectorsize; | ||||
| 	return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size; | ||||
| } | ||||
| 
 | ||||
| static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size) | ||||
| { | ||||
| 	ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size)); | ||||
| 
 | ||||
| 	return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits; | ||||
| } | ||||
| 
 | ||||
| static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum), | ||||
| 				       fs_info->csum_size); | ||||
| 
 | ||||
| 	return csum_size_to_bytes(fs_info, max_csum_size); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -135,9 +154,7 @@ static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, | |||
|  */ | ||||
| static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes) | ||||
| { | ||||
| 	int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize); | ||||
| 
 | ||||
| 	return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size; | ||||
| 	return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes); | ||||
| } | ||||
| 
 | ||||
| int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, | ||||
|  | @ -254,7 +271,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| /*
 | ||||
|  * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and | ||||
|  * estore the result to @dst. | ||||
|  * store the result to @dst. | ||||
|  * | ||||
|  * Return >0 for the number of sectors we found. | ||||
|  * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum | ||||
|  | @ -360,15 +377,15 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Lookup the checksum for the read bio in csum tree. | ||||
|  * | ||||
|  * @inode: inode that the bio is for. | ||||
|  * @bio: bio to look up. | ||||
|  * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return | ||||
|  *       checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If | ||||
|  *       NULL, the checksum buffer is allocated and returned in | ||||
|  *       btrfs_bio(bio)->csum instead. | ||||
|  * @inode:  inode that the bio is for. | ||||
|  * @bio:    bio to look up. | ||||
|  * @dst:    Buffer of size nblocks * btrfs_super_csum_size() used to return | ||||
|  *          checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If | ||||
|  *          NULL, the checksum buffer is allocated and returned in | ||||
|  *          btrfs_bio(bio)->csum instead. | ||||
|  * | ||||
|  * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. | ||||
|  */ | ||||
|  | @ -510,9 +527,9 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | ||||
| 			     struct list_head *list, int search_commit, | ||||
| 			     bool nowait) | ||||
| int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, | ||||
| 			    struct list_head *list, int search_commit, | ||||
| 			    bool nowait) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct btrfs_key key; | ||||
|  | @ -521,11 +538,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 	struct btrfs_ordered_sum *sums; | ||||
| 	struct btrfs_csum_item *item; | ||||
| 	LIST_HEAD(tmplist); | ||||
| 	unsigned long offset; | ||||
| 	int ret; | ||||
| 	size_t size; | ||||
| 	u64 csum_end; | ||||
| 	const u32 csum_size = fs_info->csum_size; | ||||
| 
 | ||||
| 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && | ||||
| 	       IS_ALIGNED(end + 1, fs_info->sectorsize)); | ||||
|  | @ -551,16 +564,33 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 	if (ret > 0 && path->slots[0] > 0) { | ||||
| 		leaf = path->nodes[0]; | ||||
| 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * There are two cases we can hit here for the previous csum | ||||
| 		 * item: | ||||
| 		 * | ||||
| 		 *		|<- search range ->| | ||||
| 		 *	|<- csum item ->| | ||||
| 		 * | ||||
| 		 * Or | ||||
| 		 *				|<- search range ->| | ||||
| 		 *	|<- csum item ->| | ||||
| 		 * | ||||
| 		 * Check if the previous csum item covers the leading part of | ||||
| 		 * the search range.  If so we have to start from previous csum | ||||
| 		 * item. | ||||
| 		 */ | ||||
| 		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && | ||||
| 		    key.type == BTRFS_EXTENT_CSUM_KEY) { | ||||
| 			offset = (start - key.offset) >> fs_info->sectorsize_bits; | ||||
| 			if (offset * csum_size < | ||||
| 			if (bytes_to_csum_size(fs_info, start - key.offset) < | ||||
| 			    btrfs_item_size(leaf, path->slots[0] - 1)) | ||||
| 				path->slots[0]--; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	while (start <= end) { | ||||
| 		u64 csum_end; | ||||
| 
 | ||||
| 		leaf = path->nodes[0]; | ||||
| 		if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||||
| 			ret = btrfs_next_leaf(root, path); | ||||
|  | @ -580,8 +610,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 		if (key.offset > start) | ||||
| 			start = key.offset; | ||||
| 
 | ||||
| 		size = btrfs_item_size(leaf, path->slots[0]); | ||||
| 		csum_end = key.offset + (size / csum_size) * fs_info->sectorsize; | ||||
| 		csum_end = key.offset + csum_size_to_bytes(fs_info, | ||||
| 					btrfs_item_size(leaf, path->slots[0])); | ||||
| 		if (csum_end <= start) { | ||||
| 			path->slots[0]++; | ||||
| 			continue; | ||||
|  | @ -591,8 +621,11 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 		item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||||
| 				      struct btrfs_csum_item); | ||||
| 		while (start < csum_end) { | ||||
| 			unsigned long offset; | ||||
| 			size_t size; | ||||
| 
 | ||||
| 			size = min_t(size_t, csum_end - start, | ||||
| 				     max_ordered_sum_bytes(fs_info, csum_size)); | ||||
| 				     max_ordered_sum_bytes(fs_info)); | ||||
| 			sums = kzalloc(btrfs_ordered_sum_size(fs_info, size), | ||||
| 				       GFP_NOFS); | ||||
| 			if (!sums) { | ||||
|  | @ -603,16 +636,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 			sums->bytenr = start; | ||||
| 			sums->len = (int)size; | ||||
| 
 | ||||
| 			offset = (start - key.offset) >> fs_info->sectorsize_bits; | ||||
| 			offset *= csum_size; | ||||
| 			size >>= fs_info->sectorsize_bits; | ||||
| 			offset = bytes_to_csum_size(fs_info, start - key.offset); | ||||
| 
 | ||||
| 			read_extent_buffer(path->nodes[0], | ||||
| 					   sums->sums, | ||||
| 					   ((unsigned long)item) + offset, | ||||
| 					   csum_size * size); | ||||
| 					   bytes_to_csum_size(fs_info, size)); | ||||
| 
 | ||||
| 			start += fs_info->sectorsize * size; | ||||
| 			start += size; | ||||
| 			list_add_tail(&sums->list, &tmplist); | ||||
| 		} | ||||
| 		path->slots[0]++; | ||||
|  | @ -630,8 +661,129 @@ fail: | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Calculate checksums of the data contained inside a bio | ||||
| /*
 | ||||
|  * Do the same work as btrfs_lookup_csums_list(), the difference is in how | ||||
|  * we return the result. | ||||
|  * | ||||
|  * This version will set the corresponding bits in @csum_bitmap to represent | ||||
|  * that there is a csum found. | ||||
|  * Each bit represents a sector. Thus caller should ensure @csum_buf passed | ||||
|  * in is large enough to contain all csums. | ||||
|  */ | ||||
| int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end, | ||||
| 			      u8 *csum_buf, unsigned long *csum_bitmap) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct btrfs_key key; | ||||
| 	struct btrfs_path *path; | ||||
| 	struct extent_buffer *leaf; | ||||
| 	struct btrfs_csum_item *item; | ||||
| 	const u64 orig_start = start; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && | ||||
| 	       IS_ALIGNED(end + 1, fs_info->sectorsize)); | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
| 	if (!path) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | ||||
| 	key.type = BTRFS_EXTENT_CSUM_KEY; | ||||
| 	key.offset = start; | ||||
| 
 | ||||
| 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||||
| 	if (ret < 0) | ||||
| 		goto fail; | ||||
| 	if (ret > 0 && path->slots[0] > 0) { | ||||
| 		leaf = path->nodes[0]; | ||||
| 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * There are two cases we can hit here for the previous csum | ||||
| 		 * item: | ||||
| 		 * | ||||
| 		 *		|<- search range ->| | ||||
| 		 *	|<- csum item ->| | ||||
| 		 * | ||||
| 		 * Or | ||||
| 		 *				|<- search range ->| | ||||
| 		 *	|<- csum item ->| | ||||
| 		 * | ||||
| 		 * Check if the previous csum item covers the leading part of | ||||
| 		 * the search range.  If so we have to start from previous csum | ||||
| 		 * item. | ||||
| 		 */ | ||||
| 		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && | ||||
| 		    key.type == BTRFS_EXTENT_CSUM_KEY) { | ||||
| 			if (bytes_to_csum_size(fs_info, start - key.offset) < | ||||
| 			    btrfs_item_size(leaf, path->slots[0] - 1)) | ||||
| 				path->slots[0]--; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	while (start <= end) { | ||||
| 		u64 csum_end; | ||||
| 
 | ||||
| 		leaf = path->nodes[0]; | ||||
| 		if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||||
| 			ret = btrfs_next_leaf(root, path); | ||||
| 			if (ret < 0) | ||||
| 				goto fail; | ||||
| 			if (ret > 0) | ||||
| 				break; | ||||
| 			leaf = path->nodes[0]; | ||||
| 		} | ||||
| 
 | ||||
| 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||||
| 		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || | ||||
| 		    key.type != BTRFS_EXTENT_CSUM_KEY || | ||||
| 		    key.offset > end) | ||||
| 			break; | ||||
| 
 | ||||
| 		if (key.offset > start) | ||||
| 			start = key.offset; | ||||
| 
 | ||||
| 		csum_end = key.offset + csum_size_to_bytes(fs_info, | ||||
| 					btrfs_item_size(leaf, path->slots[0])); | ||||
| 		if (csum_end <= start) { | ||||
| 			path->slots[0]++; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		csum_end = min(csum_end, end + 1); | ||||
| 		item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||||
| 				      struct btrfs_csum_item); | ||||
| 		while (start < csum_end) { | ||||
| 			unsigned long offset; | ||||
| 			size_t size; | ||||
| 			u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info, | ||||
| 						start - orig_start); | ||||
| 
 | ||||
| 			size = min_t(size_t, csum_end - start, end + 1 - start); | ||||
| 
 | ||||
| 			offset = bytes_to_csum_size(fs_info, start - key.offset); | ||||
| 
 | ||||
| 			read_extent_buffer(path->nodes[0], csum_dest, | ||||
| 					   ((unsigned long)item) + offset, | ||||
| 					   bytes_to_csum_size(fs_info, size)); | ||||
| 
 | ||||
| 			bitmap_set(csum_bitmap, | ||||
| 				(start - orig_start) >> fs_info->sectorsize_bits, | ||||
| 				size >> fs_info->sectorsize_bits); | ||||
| 
 | ||||
| 			start += size; | ||||
| 		} | ||||
| 		path->slots[0]++; | ||||
| 	} | ||||
| 	ret = 0; | ||||
| fail: | ||||
| 	btrfs_free_path(path); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Calculate checksums of the data contained inside a bio. | ||||
|  * | ||||
|  * @inode:	 Owner of the data inside the bio | ||||
|  * @bio:	 Contains the data to be checksummed | ||||
|  | @ -746,15 +898,16 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * helper function for csum removal, this expects the | ||||
|  * key to describe the csum pointed to by the path, and it expects | ||||
|  * the csum to overlap the range [bytenr, len] | ||||
|  * Remove one checksum overlapping a range. | ||||
|  * | ||||
|  * The csum should not be entirely contained in the range and the | ||||
|  * range should not be entirely contained in the csum. | ||||
|  * This expects the key to describe the csum pointed to by the path, and it | ||||
|  * expects the csum to overlap the range [bytenr, len] | ||||
|  * | ||||
|  * This calls btrfs_truncate_item with the correct args based on the | ||||
|  * overlap, and fixes up the key as required. | ||||
|  * The csum should not be entirely contained in the range and the range should | ||||
|  * not be entirely contained in the csum. | ||||
|  * | ||||
|  * This calls btrfs_truncate_item with the correct args based on the overlap, | ||||
|  * and fixes up the key as required. | ||||
|  */ | ||||
| static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, | ||||
| 				       struct btrfs_path *path, | ||||
|  | @ -803,8 +956,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * deletes the csum items from the csum tree for a given | ||||
|  * range of bytes. | ||||
|  * Delete the csum items from the csum tree for a given range of bytes. | ||||
|  */ | ||||
| int btrfs_del_csums(struct btrfs_trans_handle *trans, | ||||
| 		    struct btrfs_root *root, u64 bytenr, u64 len) | ||||
|  | @ -1209,7 +1361,6 @@ out: | |||
| void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, | ||||
| 				     const struct btrfs_path *path, | ||||
| 				     struct btrfs_file_extent_item *fi, | ||||
| 				     const bool new_inline, | ||||
| 				     struct extent_map *em) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||||
|  | @ -1261,10 +1412,9 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, | |||
| 		 */ | ||||
| 		em->orig_start = EXTENT_MAP_HOLE; | ||||
| 		em->block_len = (u64)-1; | ||||
| 		if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) { | ||||
| 		em->compress_type = compress_type; | ||||
| 		if (compress_type != BTRFS_COMPRESS_NONE) | ||||
| 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||||
| 			em->compress_type = compress_type; | ||||
| 		} | ||||
| 	} else { | ||||
| 		btrfs_err(fs_info, | ||||
| 			  "unknown file extent item type %d, inode %llu, offset %llu, " | ||||
|  |  | |||
							
								
								
									
										69
									
								
								fs/btrfs/file-item.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								fs/btrfs/file-item.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,69 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_FILE_ITEM_H | ||||
| #define BTRFS_FILE_ITEM_H | ||||
| 
 | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| #define BTRFS_FILE_EXTENT_INLINE_DATA_START		\ | ||||
| 		(offsetof(struct btrfs_file_extent_item, disk_bytenr)) | ||||
| 
 | ||||
| static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info) | ||||
| { | ||||
| 	return BTRFS_MAX_ITEM_SIZE(info) - BTRFS_FILE_EXTENT_INLINE_DATA_START; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return the number of bytes used by the item on disk, minus the size of any | ||||
|  * extent headers.  If a file is compressed on disk, this is the compressed | ||||
|  * size. | ||||
|  */ | ||||
| static inline u32 btrfs_file_extent_inline_item_len( | ||||
| 						const struct extent_buffer *eb, | ||||
| 						int nr) | ||||
| { | ||||
| 	return btrfs_item_size(eb, nr) - BTRFS_FILE_EXTENT_INLINE_DATA_START; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long btrfs_file_extent_inline_start( | ||||
| 				const struct btrfs_file_extent_item *e) | ||||
| { | ||||
| 	return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; | ||||
| } | ||||
| 
 | ||||
| static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | ||||
| { | ||||
| 	return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize; | ||||
| } | ||||
| 
 | ||||
| int btrfs_del_csums(struct btrfs_trans_handle *trans, | ||||
| 		    struct btrfs_root *root, u64 bytenr, u64 len); | ||||
| blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst); | ||||
| int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, | ||||
| 			     struct btrfs_root *root, u64 objectid, u64 pos, | ||||
| 			     u64 num_bytes); | ||||
| int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | ||||
| 			     struct btrfs_root *root, | ||||
| 			     struct btrfs_path *path, u64 objectid, | ||||
| 			     u64 bytenr, int mod); | ||||
| int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_root *root, | ||||
| 			   struct btrfs_ordered_sum *sums); | ||||
| blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, | ||||
| 				u64 offset, bool one_ordered); | ||||
| int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, | ||||
| 			    struct list_head *list, int search_commit, | ||||
| 			    bool nowait); | ||||
| int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end, | ||||
| 			      u8 *csum_buf, unsigned long *csum_bitmap); | ||||
| void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, | ||||
| 				     const struct btrfs_path *path, | ||||
| 				     struct btrfs_file_extent_item *fi, | ||||
| 				     struct extent_map *em); | ||||
| int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, | ||||
| 					u64 len); | ||||
| int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, u64 len); | ||||
| void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size); | ||||
| u64 btrfs_file_extent_end(const struct btrfs_path *path); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										623
									
								
								fs/btrfs/file.c
									
										
									
									
									
								
							
							
						
						
									
										623
									
								
								fs/btrfs/file.c
									
										
									
									
									
								
							|  | @ -30,329 +30,13 @@ | |||
| #include "delalloc-space.h" | ||||
| #include "reflink.h" | ||||
| #include "subpage.h" | ||||
| 
 | ||||
| static struct kmem_cache *btrfs_inode_defrag_cachep; | ||||
| /*
 | ||||
|  * when auto defrag is enabled we | ||||
|  * queue up these defrag structs to remember which | ||||
|  * inodes need defragging passes | ||||
|  */ | ||||
| struct inode_defrag { | ||||
| 	struct rb_node rb_node; | ||||
| 	/* objectid */ | ||||
| 	u64 ino; | ||||
| 	/*
 | ||||
| 	 * transid where the defrag was added, we search for | ||||
| 	 * extents newer than this | ||||
| 	 */ | ||||
| 	u64 transid; | ||||
| 
 | ||||
| 	/* root objectid */ | ||||
| 	u64 root; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The extent size threshold for autodefrag. | ||||
| 	 * | ||||
| 	 * This value is different for compressed/non-compressed extents, | ||||
| 	 * thus needs to be passed from higher layer. | ||||
| 	 * (aka, inode_should_defrag()) | ||||
| 	 */ | ||||
| 	u32 extent_thresh; | ||||
| }; | ||||
| 
 | ||||
| static int __compare_inode_defrag(struct inode_defrag *defrag1, | ||||
| 				  struct inode_defrag *defrag2) | ||||
| { | ||||
| 	if (defrag1->root > defrag2->root) | ||||
| 		return 1; | ||||
| 	else if (defrag1->root < defrag2->root) | ||||
| 		return -1; | ||||
| 	else if (defrag1->ino > defrag2->ino) | ||||
| 		return 1; | ||||
| 	else if (defrag1->ino < defrag2->ino) | ||||
| 		return -1; | ||||
| 	else | ||||
| 		return 0; | ||||
| } | ||||
| 
 | ||||
| /* pop a record for an inode into the defrag tree.  The lock
 | ||||
|  * must be held already | ||||
|  * | ||||
|  * If you're inserting a record for an older transid than an | ||||
|  * existing record, the transid already in the tree is lowered | ||||
|  * | ||||
|  * If an existing record is found the defrag item you | ||||
|  * pass in is freed | ||||
|  */ | ||||
| static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, | ||||
| 				    struct inode_defrag *defrag) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||||
| 	struct inode_defrag *entry; | ||||
| 	struct rb_node **p; | ||||
| 	struct rb_node *parent = NULL; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	p = &fs_info->defrag_inodes.rb_node; | ||||
| 	while (*p) { | ||||
| 		parent = *p; | ||||
| 		entry = rb_entry(parent, struct inode_defrag, rb_node); | ||||
| 
 | ||||
| 		ret = __compare_inode_defrag(defrag, entry); | ||||
| 		if (ret < 0) | ||||
| 			p = &parent->rb_left; | ||||
| 		else if (ret > 0) | ||||
| 			p = &parent->rb_right; | ||||
| 		else { | ||||
| 			/* if we're reinserting an entry for
 | ||||
| 			 * an old defrag run, make sure to | ||||
| 			 * lower the transid of our existing record | ||||
| 			 */ | ||||
| 			if (defrag->transid < entry->transid) | ||||
| 				entry->transid = defrag->transid; | ||||
| 			entry->extent_thresh = min(defrag->extent_thresh, | ||||
| 						   entry->extent_thresh); | ||||
| 			return -EEXIST; | ||||
| 		} | ||||
| 	} | ||||
| 	set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags); | ||||
| 	rb_link_node(&defrag->rb_node, parent, p); | ||||
| 	rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (!btrfs_test_opt(fs_info, AUTO_DEFRAG)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (btrfs_fs_closing(fs_info)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * insert a defrag record for this inode if auto defrag is | ||||
|  * enabled | ||||
|  */ | ||||
| int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_inode *inode, u32 extent_thresh) | ||||
| { | ||||
| 	struct btrfs_root *root = inode->root; | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct inode_defrag *defrag; | ||||
| 	u64 transid; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (!__need_auto_defrag(fs_info)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (trans) | ||||
| 		transid = trans->transid; | ||||
| 	else | ||||
| 		transid = inode->root->last_trans; | ||||
| 
 | ||||
| 	defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); | ||||
| 	if (!defrag) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	defrag->ino = btrfs_ino(inode); | ||||
| 	defrag->transid = transid; | ||||
| 	defrag->root = root->root_key.objectid; | ||||
| 	defrag->extent_thresh = extent_thresh; | ||||
| 
 | ||||
| 	spin_lock(&fs_info->defrag_inodes_lock); | ||||
| 	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) { | ||||
| 		/*
 | ||||
| 		 * If we set IN_DEFRAG flag and evict the inode from memory, | ||||
| 		 * and then re-read this inode, this new inode doesn't have | ||||
| 		 * IN_DEFRAG flag. At the case, we may find the existed defrag. | ||||
| 		 */ | ||||
| 		ret = __btrfs_add_inode_defrag(inode, defrag); | ||||
| 		if (ret) | ||||
| 			kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||||
| 	} else { | ||||
| 		kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||||
| 	} | ||||
| 	spin_unlock(&fs_info->defrag_inodes_lock); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * pick the defragable inode that we want, if it doesn't exist, we will get | ||||
|  * the next one. | ||||
|  */ | ||||
| static struct inode_defrag * | ||||
| btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) | ||||
| { | ||||
| 	struct inode_defrag *entry = NULL; | ||||
| 	struct inode_defrag tmp; | ||||
| 	struct rb_node *p; | ||||
| 	struct rb_node *parent = NULL; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	tmp.ino = ino; | ||||
| 	tmp.root = root; | ||||
| 
 | ||||
| 	spin_lock(&fs_info->defrag_inodes_lock); | ||||
| 	p = fs_info->defrag_inodes.rb_node; | ||||
| 	while (p) { | ||||
| 		parent = p; | ||||
| 		entry = rb_entry(parent, struct inode_defrag, rb_node); | ||||
| 
 | ||||
| 		ret = __compare_inode_defrag(&tmp, entry); | ||||
| 		if (ret < 0) | ||||
| 			p = parent->rb_left; | ||||
| 		else if (ret > 0) | ||||
| 			p = parent->rb_right; | ||||
| 		else | ||||
| 			goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (parent && __compare_inode_defrag(&tmp, entry) > 0) { | ||||
| 		parent = rb_next(parent); | ||||
| 		if (parent) | ||||
| 			entry = rb_entry(parent, struct inode_defrag, rb_node); | ||||
| 		else | ||||
| 			entry = NULL; | ||||
| 	} | ||||
| out: | ||||
| 	if (entry) | ||||
| 		rb_erase(parent, &fs_info->defrag_inodes); | ||||
| 	spin_unlock(&fs_info->defrag_inodes_lock); | ||||
| 	return entry; | ||||
| } | ||||
| 
 | ||||
| void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	struct inode_defrag *defrag; | ||||
| 	struct rb_node *node; | ||||
| 
 | ||||
| 	spin_lock(&fs_info->defrag_inodes_lock); | ||||
| 	node = rb_first(&fs_info->defrag_inodes); | ||||
| 	while (node) { | ||||
| 		rb_erase(node, &fs_info->defrag_inodes); | ||||
| 		defrag = rb_entry(node, struct inode_defrag, rb_node); | ||||
| 		kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||||
| 
 | ||||
| 		cond_resched_lock(&fs_info->defrag_inodes_lock); | ||||
| 
 | ||||
| 		node = rb_first(&fs_info->defrag_inodes); | ||||
| 	} | ||||
| 	spin_unlock(&fs_info->defrag_inodes_lock); | ||||
| } | ||||
| 
 | ||||
| #define BTRFS_DEFRAG_BATCH	1024 | ||||
| 
 | ||||
| static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | ||||
| 				    struct inode_defrag *defrag) | ||||
| { | ||||
| 	struct btrfs_root *inode_root; | ||||
| 	struct inode *inode; | ||||
| 	struct btrfs_ioctl_defrag_range_args range; | ||||
| 	int ret = 0; | ||||
| 	u64 cur = 0; | ||||
| 
 | ||||
| again: | ||||
| 	if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)) | ||||
| 		goto cleanup; | ||||
| 	if (!__need_auto_defrag(fs_info)) | ||||
| 		goto cleanup; | ||||
| 
 | ||||
| 	/* get the inode */ | ||||
| 	inode_root = btrfs_get_fs_root(fs_info, defrag->root, true); | ||||
| 	if (IS_ERR(inode_root)) { | ||||
| 		ret = PTR_ERR(inode_root); | ||||
| 		goto cleanup; | ||||
| 	} | ||||
| 
 | ||||
| 	inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root); | ||||
| 	btrfs_put_root(inode_root); | ||||
| 	if (IS_ERR(inode)) { | ||||
| 		ret = PTR_ERR(inode); | ||||
| 		goto cleanup; | ||||
| 	} | ||||
| 
 | ||||
| 	if (cur >= i_size_read(inode)) { | ||||
| 		iput(inode); | ||||
| 		goto cleanup; | ||||
| 	} | ||||
| 
 | ||||
| 	/* do a chunk of defrag */ | ||||
| 	clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||||
| 	memset(&range, 0, sizeof(range)); | ||||
| 	range.len = (u64)-1; | ||||
| 	range.start = cur; | ||||
| 	range.extent_thresh = defrag->extent_thresh; | ||||
| 
 | ||||
| 	sb_start_write(fs_info->sb); | ||||
| 	ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||||
| 				       BTRFS_DEFRAG_BATCH); | ||||
| 	sb_end_write(fs_info->sb); | ||||
| 	iput(inode); | ||||
| 
 | ||||
| 	if (ret < 0) | ||||
| 		goto cleanup; | ||||
| 
 | ||||
| 	cur = max(cur + fs_info->sectorsize, range.start); | ||||
| 	goto again; | ||||
| 
 | ||||
| cleanup: | ||||
| 	kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * run through the list of inodes in the FS that need | ||||
|  * defragging | ||||
|  */ | ||||
| int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	struct inode_defrag *defrag; | ||||
| 	u64 first_ino = 0; | ||||
| 	u64 root_objectid = 0; | ||||
| 
 | ||||
| 	atomic_inc(&fs_info->defrag_running); | ||||
| 	while (1) { | ||||
| 		/* Pause the auto defragger. */ | ||||
| 		if (test_bit(BTRFS_FS_STATE_REMOUNTING, | ||||
| 			     &fs_info->fs_state)) | ||||
| 			break; | ||||
| 
 | ||||
| 		if (!__need_auto_defrag(fs_info)) | ||||
| 			break; | ||||
| 
 | ||||
| 		/* find an inode to defrag */ | ||||
| 		defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, | ||||
| 						 first_ino); | ||||
| 		if (!defrag) { | ||||
| 			if (root_objectid || first_ino) { | ||||
| 				root_objectid = 0; | ||||
| 				first_ino = 0; | ||||
| 				continue; | ||||
| 			} else { | ||||
| 				break; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		first_ino = defrag->ino + 1; | ||||
| 		root_objectid = defrag->root; | ||||
| 
 | ||||
| 		__btrfs_run_defrag_inode(fs_info, defrag); | ||||
| 	} | ||||
| 	atomic_dec(&fs_info->defrag_running); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * during unmount, we use the transaction_wait queue to | ||||
| 	 * wait for the defragger to stop | ||||
| 	 */ | ||||
| 	wake_up(&fs_info->transaction_wait); | ||||
| 	return 0; | ||||
| } | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "file-item.h" | ||||
| #include "ioctl.h" | ||||
| #include "file.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| /* simple helper to fault in pages and copy.  This should go away
 | ||||
|  * and be replaced with calls into generic code. | ||||
|  | @ -696,7 +380,10 @@ next_slot: | |||
| 						args->start - extent_offset, | ||||
| 						0, false); | ||||
| 				ret = btrfs_inc_extent_ref(trans, &ref); | ||||
| 				BUG_ON(ret); /* -ENOMEM */ | ||||
| 				if (ret) { | ||||
| 					btrfs_abort_transaction(trans, ret); | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
| 			key.offset = args->start; | ||||
| 		} | ||||
|  | @ -783,7 +470,10 @@ delete_extent_item: | |||
| 						key.offset - extent_offset, 0, | ||||
| 						false); | ||||
| 				ret = btrfs_free_extent(trans, &ref); | ||||
| 				BUG_ON(ret); /* -ENOMEM */ | ||||
| 				if (ret) { | ||||
| 					btrfs_abort_transaction(trans, ret); | ||||
| 					break; | ||||
| 				} | ||||
| 				args->bytes_found += extent_end - key.offset; | ||||
| 			} | ||||
| 
 | ||||
|  | @ -1302,7 +992,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, | |||
| 		struct btrfs_ordered_extent *ordered; | ||||
| 
 | ||||
| 		if (nowait) { | ||||
| 			if (!try_lock_extent(&inode->io_tree, start_pos, last_pos)) { | ||||
| 			if (!try_lock_extent(&inode->io_tree, start_pos, last_pos, | ||||
| 					     cached_state)) { | ||||
| 				for (i = 0; i < num_pages; i++) { | ||||
| 					unlock_page(pages[i]); | ||||
| 					put_page(pages[i]); | ||||
|  | @ -1372,6 +1063,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, | |||
| { | ||||
| 	struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||||
| 	struct btrfs_root *root = inode->root; | ||||
| 	struct extent_state *cached_state = NULL; | ||||
| 	u64 lockstart, lockend; | ||||
| 	u64 num_bytes; | ||||
| 	int ret; | ||||
|  | @ -1388,12 +1080,14 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, | |||
| 	num_bytes = lockend - lockstart + 1; | ||||
| 
 | ||||
| 	if (nowait) { | ||||
| 		if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend)) { | ||||
| 		if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend, | ||||
| 						  &cached_state)) { | ||||
| 			btrfs_drew_write_unlock(&root->snapshot_lock); | ||||
| 			return -EAGAIN; | ||||
| 		} | ||||
| 	} else { | ||||
| 		btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, NULL); | ||||
| 		btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, | ||||
| 						   &cached_state); | ||||
| 	} | ||||
| 	ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, | ||||
| 			NULL, NULL, NULL, nowait, false); | ||||
|  | @ -1402,7 +1096,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, | |||
| 	else | ||||
| 		*write_bytes = min_t(size_t, *write_bytes , | ||||
| 				     num_bytes - pos + lockstart); | ||||
| 	unlock_extent(&inode->io_tree, lockstart, lockend, NULL); | ||||
| 	unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
|  | @ -1505,7 +1199,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, | |||
| 	if (nowait) | ||||
| 		ilock_flags |= BTRFS_ILOCK_TRY; | ||||
| 
 | ||||
| 	ret = btrfs_inode_lock(inode, ilock_flags); | ||||
| 	ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags); | ||||
| 	if (ret < 0) | ||||
| 		return ret; | ||||
| 
 | ||||
|  | @ -1740,7 +1434,7 @@ again: | |||
| 		iocb->ki_pos += num_written; | ||||
| 	} | ||||
| out: | ||||
| 	btrfs_inode_unlock(inode, ilock_flags); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); | ||||
| 	return num_written ? num_written : ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -1780,19 +1474,19 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) | |||
| 		ilock_flags |= BTRFS_ILOCK_SHARED; | ||||
| 
 | ||||
| relock: | ||||
| 	err = btrfs_inode_lock(inode, ilock_flags); | ||||
| 	err = btrfs_inode_lock(BTRFS_I(inode), ilock_flags); | ||||
| 	if (err < 0) | ||||
| 		return err; | ||||
| 
 | ||||
| 	err = generic_write_checks(iocb, from); | ||||
| 	if (err <= 0) { | ||||
| 		btrfs_inode_unlock(inode, ilock_flags); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); | ||||
| 		return err; | ||||
| 	} | ||||
| 
 | ||||
| 	err = btrfs_write_check(iocb, from, err); | ||||
| 	if (err < 0) { | ||||
| 		btrfs_inode_unlock(inode, ilock_flags); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -1803,13 +1497,13 @@ relock: | |||
| 	 */ | ||||
| 	if ((ilock_flags & BTRFS_ILOCK_SHARED) && | ||||
| 	    pos + iov_iter_count(from) > i_size_read(inode)) { | ||||
| 		btrfs_inode_unlock(inode, ilock_flags); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); | ||||
| 		ilock_flags &= ~BTRFS_ILOCK_SHARED; | ||||
| 		goto relock; | ||||
| 	} | ||||
| 
 | ||||
| 	if (check_direct_IO(fs_info, from, pos)) { | ||||
| 		btrfs_inode_unlock(inode, ilock_flags); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); | ||||
| 		goto buffered; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -1840,7 +1534,7 @@ relock: | |||
| 	 * iocb, and that needs to lock the inode. So unlock it before calling | ||||
| 	 * iomap_dio_complete() to avoid a deadlock. | ||||
| 	 */ | ||||
| 	btrfs_inode_unlock(inode, ilock_flags); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); | ||||
| 
 | ||||
| 	if (IS_ERR_OR_NULL(dio)) | ||||
| 		err = PTR_ERR_OR_ZERO(dio); | ||||
|  | @ -1887,8 +1581,8 @@ buffered: | |||
| 	/*
 | ||||
| 	 * If we are in a NOWAIT context, then return -EAGAIN to signal the caller | ||||
| 	 * it must retry the operation in a context where blocking is acceptable, | ||||
| 	 * since we currently don't have NOWAIT semantics support for buffered IO | ||||
| 	 * and may block there for many reasons (reserving space for example). | ||||
| 	 * because even if we end up not blocking during the buffered IO attempt | ||||
| 	 * below, we will block when flushing and waiting for the IO. | ||||
| 	 */ | ||||
| 	if (iocb->ki_flags & IOCB_NOWAIT) { | ||||
| 		err = -EAGAIN; | ||||
|  | @ -1928,7 +1622,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from, | |||
| 	loff_t count; | ||||
| 	ssize_t ret; | ||||
| 
 | ||||
| 	btrfs_inode_lock(inode, 0); | ||||
| 	btrfs_inode_lock(BTRFS_I(inode), 0); | ||||
| 	count = encoded->len; | ||||
| 	ret = generic_write_checks_count(iocb, &count); | ||||
| 	if (ret == 0 && count != encoded->len) { | ||||
|  | @ -1947,7 +1641,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from, | |||
| 
 | ||||
| 	ret = btrfs_do_encoded_write(iocb, from, encoded); | ||||
| out: | ||||
| 	btrfs_inode_unlock(inode, 0); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), 0); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -2008,10 +1702,12 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
| { | ||||
| 	struct btrfs_file_private *private = filp->private_data; | ||||
| 
 | ||||
| 	if (private && private->filldir_buf) | ||||
| 	if (private) { | ||||
| 		kfree(private->filldir_buf); | ||||
| 	kfree(private); | ||||
| 	filp->private_data = NULL; | ||||
| 		free_extent_state(private->llseek_cached_state); | ||||
| 		kfree(private); | ||||
| 		filp->private_data = NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Set by setattr when we are about to truncate a file from a non-zero | ||||
|  | @ -2118,7 +1814,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 	if (ret) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 
 | ||||
| 	atomic_inc(&root->log_batch); | ||||
| 
 | ||||
|  | @ -2142,7 +1838,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 	 */ | ||||
| 	ret = start_ordered_ops(inode, start, end); | ||||
| 	if (ret) { | ||||
| 		btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -2245,7 +1941,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 	 * file again, but that will end up using the synchronization | ||||
| 	 * inside btrfs_sync_log to keep things safe. | ||||
| 	 */ | ||||
| 	btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 
 | ||||
| 	if (ret == BTRFS_NO_LOG_SYNC) { | ||||
| 		ret = btrfs_end_transaction(trans); | ||||
|  | @ -2313,7 +2009,7 @@ out: | |||
| 
 | ||||
| out_release_extents: | ||||
| 	btrfs_release_log_ctx_extents(&ctx); | ||||
| 	btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 	goto out; | ||||
| } | ||||
| 
 | ||||
|  | @ -2908,7 +2604,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) | |||
| 	bool truncated_block = false; | ||||
| 	bool updated_inode = false; | ||||
| 
 | ||||
| 	btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 
 | ||||
| 	ret = btrfs_wait_ordered_range(inode, offset, len); | ||||
| 	if (ret) | ||||
|  | @ -2956,7 +2652,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) | |||
| 		truncated_block = true; | ||||
| 		ret = btrfs_truncate_block(BTRFS_I(inode), offset, 0, 0); | ||||
| 		if (ret) { | ||||
| 			btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 			btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 			return ret; | ||||
| 		} | ||||
| 	} | ||||
|  | @ -3055,7 +2751,7 @@ out_only_mutex: | |||
| 				ret = ret2; | ||||
| 		} | ||||
| 	} | ||||
| 	btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -3366,7 +3062,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 	if (mode & FALLOC_FL_PUNCH_HOLE) | ||||
| 		return btrfs_punch_hole(file, offset, len); | ||||
| 
 | ||||
| 	btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 
 | ||||
| 	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) { | ||||
| 		ret = inode_newsize_ok(inode, offset + len); | ||||
|  | @ -3416,7 +3112,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 
 | ||||
| 	if (mode & FALLOC_FL_ZERO_RANGE) { | ||||
| 		ret = btrfs_zero_range(inode, offset, len, mode); | ||||
| 		btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -3514,7 +3210,7 @@ out_unlock: | |||
| 	unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||||
| 		      &cached_state); | ||||
| out: | ||||
| 	btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); | ||||
| 	extent_changeset_free(data_reserved); | ||||
| 	return ret; | ||||
| } | ||||
|  | @ -3526,117 +3222,106 @@ out: | |||
|  * looping while it gets adjacent subranges, and merging them together. | ||||
|  */ | ||||
| static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end, | ||||
| 				   struct extent_state **cached_state, | ||||
| 				   bool *search_io_tree, | ||||
| 				   u64 *delalloc_start_ret, u64 *delalloc_end_ret) | ||||
| { | ||||
| 	const u64 len = end + 1 - start; | ||||
| 	struct extent_map_tree *em_tree = &inode->extent_tree; | ||||
| 	struct extent_map *em; | ||||
| 	u64 em_end; | ||||
| 	u64 delalloc_len; | ||||
| 	u64 len = end + 1 - start; | ||||
| 	u64 delalloc_len = 0; | ||||
| 	struct btrfs_ordered_extent *oe; | ||||
| 	u64 oe_start; | ||||
| 	u64 oe_end; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Search the io tree first for EXTENT_DELALLOC. If we find any, it | ||||
| 	 * means we have delalloc (dirty pages) for which writeback has not | ||||
| 	 * started yet. | ||||
| 	 */ | ||||
| 	*delalloc_start_ret = start; | ||||
| 	delalloc_len = count_range_bits(&inode->io_tree, delalloc_start_ret, end, | ||||
| 					len, EXTENT_DELALLOC, 1); | ||||
| 	/*
 | ||||
| 	 * If delalloc was found then *delalloc_start_ret has a sector size | ||||
| 	 * aligned value (rounded down). | ||||
| 	 */ | ||||
| 	if (delalloc_len > 0) | ||||
| 	if (*search_io_tree) { | ||||
| 		spin_lock(&inode->lock); | ||||
| 		if (inode->delalloc_bytes > 0) { | ||||
| 			spin_unlock(&inode->lock); | ||||
| 			*delalloc_start_ret = start; | ||||
| 			delalloc_len = count_range_bits(&inode->io_tree, | ||||
| 							delalloc_start_ret, end, | ||||
| 							len, EXTENT_DELALLOC, 1, | ||||
| 							cached_state); | ||||
| 		} else { | ||||
| 			spin_unlock(&inode->lock); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (delalloc_len > 0) { | ||||
| 		/*
 | ||||
| 		 * If delalloc was found then *delalloc_start_ret has a sector size | ||||
| 		 * aligned value (rounded down). | ||||
| 		 */ | ||||
| 		*delalloc_end_ret = *delalloc_start_ret + delalloc_len - 1; | ||||
| 
 | ||||
| 		if (*delalloc_start_ret == start) { | ||||
| 			/* Delalloc for the whole range, nothing more to do. */ | ||||
| 			if (*delalloc_end_ret == end) | ||||
| 				return true; | ||||
| 			/* Else trim our search range for ordered extents. */ | ||||
| 			start = *delalloc_end_ret + 1; | ||||
| 			len = end + 1 - start; | ||||
| 		} | ||||
| 	} else { | ||||
| 		/* No delalloc, future calls don't need to search again. */ | ||||
| 		*search_io_tree = false; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now also check if there's any extent map in the range that does not | ||||
| 	 * map to a hole or prealloc extent. We do this because: | ||||
| 	 * Now also check if there's any ordered extent in the range. | ||||
| 	 * We do this because: | ||||
| 	 * | ||||
| 	 * 1) When delalloc is flushed, the file range is locked, we clear the | ||||
| 	 *    EXTENT_DELALLOC bit from the io tree and create an extent map for | ||||
| 	 *    an allocated extent. So we might just have been called after | ||||
| 	 *    delalloc is flushed and before the ordered extent completes and | ||||
| 	 *    inserts the new file extent item in the subvolume's btree; | ||||
| 	 *    EXTENT_DELALLOC bit from the io tree and create an extent map and | ||||
| 	 *    an ordered extent for the write. So we might just have been called | ||||
| 	 *    after delalloc is flushed and before the ordered extent completes | ||||
| 	 *    and inserts the new file extent item in the subvolume's btree; | ||||
| 	 * | ||||
| 	 * 2) We may have an extent map created by flushing delalloc for a | ||||
| 	 * 2) We may have an ordered extent created by flushing delalloc for a | ||||
| 	 *    subrange that starts before the subrange we found marked with | ||||
| 	 *    EXTENT_DELALLOC in the io tree. | ||||
| 	 * | ||||
| 	 * We could also use the extent map tree to find such delalloc that is | ||||
| 	 * being flushed, but using the ordered extents tree is more efficient | ||||
| 	 * because it's usually much smaller as ordered extents are removed from | ||||
| 	 * the tree once they complete. With the extent maps, we mau have them | ||||
| 	 * in the extent map tree for a very long time, and they were either | ||||
| 	 * created by previous writes or loaded by read operations. | ||||
| 	 */ | ||||
| 	read_lock(&em_tree->lock); | ||||
| 	em = lookup_extent_mapping(em_tree, start, len); | ||||
| 	read_unlock(&em_tree->lock); | ||||
| 
 | ||||
| 	/* extent_map_end() returns a non-inclusive end offset. */ | ||||
| 	em_end = em ? extent_map_end(em) : 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we have a hole/prealloc extent map, check the next one if this one | ||||
| 	 * ends before our range's end. | ||||
| 	 */ | ||||
| 	if (em && (em->block_start == EXTENT_MAP_HOLE || | ||||
| 		   test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) && em_end < end) { | ||||
| 		struct extent_map *next_em; | ||||
| 
 | ||||
| 		read_lock(&em_tree->lock); | ||||
| 		next_em = lookup_extent_mapping(em_tree, em_end, len - em_end); | ||||
| 		read_unlock(&em_tree->lock); | ||||
| 
 | ||||
| 		free_extent_map(em); | ||||
| 		em_end = next_em ? extent_map_end(next_em) : 0; | ||||
| 		em = next_em; | ||||
| 	} | ||||
| 
 | ||||
| 	if (em && (em->block_start == EXTENT_MAP_HOLE || | ||||
| 		   test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||||
| 		free_extent_map(em); | ||||
| 		em = NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * No extent map or one for a hole or prealloc extent. Use the delalloc | ||||
| 	 * range we found in the io tree if we have one. | ||||
| 	 */ | ||||
| 	if (!em) | ||||
| 	oe = btrfs_lookup_first_ordered_range(inode, start, len); | ||||
| 	if (!oe) | ||||
| 		return (delalloc_len > 0); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We don't have any range as EXTENT_DELALLOC in the io tree, so the | ||||
| 	 * extent map is the only subrange representing delalloc. | ||||
| 	 */ | ||||
| 	/* The ordered extent may span beyond our search range. */ | ||||
| 	oe_start = max(oe->file_offset, start); | ||||
| 	oe_end = min(oe->file_offset + oe->num_bytes - 1, end); | ||||
| 
 | ||||
| 	btrfs_put_ordered_extent(oe); | ||||
| 
 | ||||
| 	/* Don't have unflushed delalloc, return the ordered extent range. */ | ||||
| 	if (delalloc_len == 0) { | ||||
| 		*delalloc_start_ret = em->start; | ||||
| 		*delalloc_end_ret = min(end, em_end - 1); | ||||
| 		free_extent_map(em); | ||||
| 		*delalloc_start_ret = oe_start; | ||||
| 		*delalloc_end_ret = oe_end; | ||||
| 		return true; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The extent map represents a delalloc range that starts before the | ||||
| 	 * delalloc range we found in the io tree. | ||||
| 	 * We have both unflushed delalloc (io_tree) and an ordered extent. | ||||
| 	 * If the ranges are adjacent returned a combined range, otherwise | ||||
| 	 * return the leftmost range. | ||||
| 	 */ | ||||
| 	if (em->start < *delalloc_start_ret) { | ||||
| 		*delalloc_start_ret = em->start; | ||||
| 		/*
 | ||||
| 		 * If the ranges are adjacent, return a combined range. | ||||
| 		 * Otherwise return the extent map's range. | ||||
| 		 */ | ||||
| 		if (em_end < *delalloc_start_ret) | ||||
| 			*delalloc_end_ret = min(end, em_end - 1); | ||||
| 
 | ||||
| 		free_extent_map(em); | ||||
| 		return true; | ||||
| 	if (oe_start < *delalloc_start_ret) { | ||||
| 		if (oe_end < *delalloc_start_ret) | ||||
| 			*delalloc_end_ret = oe_end; | ||||
| 		*delalloc_start_ret = oe_start; | ||||
| 	} else if (*delalloc_end_ret + 1 == oe_start) { | ||||
| 		*delalloc_end_ret = oe_end; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The extent map starts after the delalloc range we found in the io | ||||
| 	 * tree. If it's adjacent, return a combined range, otherwise return | ||||
| 	 * the range found in the io tree. | ||||
| 	 */ | ||||
| 	if (*delalloc_end_ret + 1 == em->start) | ||||
| 		*delalloc_end_ret = min(end, em_end - 1); | ||||
| 
 | ||||
| 	free_extent_map(em); | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
|  | @ -3648,6 +3333,8 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end | |||
|  *                       sector size aligned. | ||||
|  * @end:                 The end offset (inclusive value) of the search range. | ||||
|  *                       It does not need to be sector size aligned. | ||||
|  * @cached_state:        Extent state record used for speeding up delalloc | ||||
|  *                       searches in the inode's io_tree. Can be NULL. | ||||
|  * @delalloc_start_ret:  Output argument, set to the start offset of the | ||||
|  *                       subrange found with delalloc (may not be sector size | ||||
|  *                       aligned). | ||||
|  | @ -3659,10 +3346,12 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end | |||
|  * end offsets of the subrange. | ||||
|  */ | ||||
| bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, | ||||
| 				  struct extent_state **cached_state, | ||||
| 				  u64 *delalloc_start_ret, u64 *delalloc_end_ret) | ||||
| { | ||||
| 	u64 cur_offset = round_down(start, inode->root->fs_info->sectorsize); | ||||
| 	u64 prev_delalloc_end = 0; | ||||
| 	bool search_io_tree = true; | ||||
| 	bool ret = false; | ||||
| 
 | ||||
| 	while (cur_offset < end) { | ||||
|  | @ -3671,6 +3360,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, | |||
| 		bool delalloc; | ||||
| 
 | ||||
| 		delalloc = find_delalloc_subrange(inode, cur_offset, end, | ||||
| 						  cached_state, &search_io_tree, | ||||
| 						  &delalloc_start, | ||||
| 						  &delalloc_end); | ||||
| 		if (!delalloc) | ||||
|  | @ -3716,13 +3406,14 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, | |||
|  * is found, it updates @start_ret with the start of the subrange. | ||||
|  */ | ||||
| static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence, | ||||
| 					struct extent_state **cached_state, | ||||
| 					u64 start, u64 end, u64 *start_ret) | ||||
| { | ||||
| 	u64 delalloc_start; | ||||
| 	u64 delalloc_end; | ||||
| 	bool delalloc; | ||||
| 
 | ||||
| 	delalloc = btrfs_find_delalloc_in_range(inode, start, end, | ||||
| 	delalloc = btrfs_find_delalloc_in_range(inode, start, end, cached_state, | ||||
| 						&delalloc_start, &delalloc_end); | ||||
| 	if (delalloc && whence == SEEK_DATA) { | ||||
| 		*start_ret = delalloc_start; | ||||
|  | @ -3765,11 +3456,13 @@ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence, | |||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, | ||||
| 				  int whence) | ||||
| static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) | ||||
| { | ||||
| 	struct btrfs_inode *inode = BTRFS_I(file->f_mapping->host); | ||||
| 	struct btrfs_file_private *private = file->private_data; | ||||
| 	struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||||
| 	struct extent_state *cached_state = NULL; | ||||
| 	struct extent_state **delalloc_cached_state; | ||||
| 	const loff_t i_size = i_size_read(&inode->vfs_inode); | ||||
| 	const u64 ino = btrfs_ino(inode); | ||||
| 	struct btrfs_root *root = inode->root; | ||||
|  | @ -3794,6 +3487,22 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, | |||
| 	    inode_get_bytes(&inode->vfs_inode) == i_size) | ||||
| 		return i_size; | ||||
| 
 | ||||
| 	if (!private) { | ||||
| 		private = kzalloc(sizeof(*private), GFP_KERNEL); | ||||
| 		/*
 | ||||
| 		 * No worries if memory allocation failed. | ||||
| 		 * The private structure is used only for speeding up multiple | ||||
| 		 * lseek SEEK_HOLE/DATA calls to a file when there's delalloc, | ||||
| 		 * so everything will still be correct. | ||||
| 		 */ | ||||
| 		file->private_data = private; | ||||
| 	} | ||||
| 
 | ||||
| 	if (private) | ||||
| 		delalloc_cached_state = &private->llseek_cached_state; | ||||
| 	else | ||||
| 		delalloc_cached_state = NULL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * offset can be negative, in this case we start finding DATA/HOLE from | ||||
| 	 * the very start of the file. | ||||
|  | @ -3871,6 +3580,7 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, | |||
| 				search_start = offset; | ||||
| 
 | ||||
| 			found = find_desired_extent_in_hole(inode, whence, | ||||
| 							    delalloc_cached_state, | ||||
| 							    search_start, | ||||
| 							    key.offset - 1, | ||||
| 							    &found_start); | ||||
|  | @ -3905,6 +3615,7 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, | |||
| 				search_start = offset; | ||||
| 
 | ||||
| 			found = find_desired_extent_in_hole(inode, whence, | ||||
| 							    delalloc_cached_state, | ||||
| 							    search_start, | ||||
| 							    extent_end - 1, | ||||
| 							    &found_start); | ||||
|  | @ -3946,7 +3657,8 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, | |||
| 
 | ||||
| 	/* We have an implicit hole from the last extent found up to i_size. */ | ||||
| 	if (!found && start < i_size) { | ||||
| 		found = find_desired_extent_in_hole(inode, whence, start, | ||||
| 		found = find_desired_extent_in_hole(inode, whence, | ||||
| 						    delalloc_cached_state, start, | ||||
| 						    i_size - 1, &start); | ||||
| 		if (!found) | ||||
| 			start = i_size; | ||||
|  | @ -3974,9 +3686,9 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) | |||
| 		return generic_file_llseek(file, offset, whence); | ||||
| 	case SEEK_DATA: | ||||
| 	case SEEK_HOLE: | ||||
| 		btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); | ||||
| 		offset = find_desired_extent(BTRFS_I(inode), offset, whence); | ||||
| 		btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); | ||||
| 		btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); | ||||
| 		offset = find_desired_extent(file, offset, whence); | ||||
| 		btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -4031,7 +3743,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) | |||
| 	if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); | ||||
| 	btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); | ||||
| again: | ||||
| 	/*
 | ||||
| 	 * This is similar to what we do for direct IO writes, see the comment | ||||
|  | @ -4080,7 +3792,7 @@ again: | |||
| 			goto again; | ||||
| 		} | ||||
| 	} | ||||
| 	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); | ||||
| 	btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); | ||||
| 	return ret < 0 ? ret : read; | ||||
| } | ||||
| 
 | ||||
|  | @ -4117,23 +3829,6 @@ const struct file_operations btrfs_file_operations = { | |||
| 	.remap_file_range = btrfs_remap_file_range, | ||||
| }; | ||||
| 
 | ||||
| void __cold btrfs_auto_defrag_exit(void) | ||||
| { | ||||
| 	kmem_cache_destroy(btrfs_inode_defrag_cachep); | ||||
| } | ||||
| 
 | ||||
| int __init btrfs_auto_defrag_init(void) | ||||
| { | ||||
| 	btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", | ||||
| 					sizeof(struct inode_defrag), 0, | ||||
| 					SLAB_MEM_SPREAD, | ||||
| 					NULL); | ||||
| 	if (!btrfs_inode_defrag_cachep) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) | ||||
| { | ||||
| 	int ret; | ||||
|  |  | |||
							
								
								
									
										33
									
								
								fs/btrfs/file.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								fs/btrfs/file.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_FILE_H | ||||
| #define BTRFS_FILE_H | ||||
| 
 | ||||
| extern const struct file_operations btrfs_file_operations; | ||||
| 
 | ||||
| int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); | ||||
| int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||||
| 		       struct btrfs_root *root, struct btrfs_inode *inode, | ||||
| 		       struct btrfs_drop_extents_args *args); | ||||
| int btrfs_replace_file_extents(struct btrfs_inode *inode, | ||||
| 			   struct btrfs_path *path, const u64 start, | ||||
| 			   const u64 end, | ||||
| 			   struct btrfs_replace_extent_info *extent_info, | ||||
| 			   struct btrfs_trans_handle **trans_out); | ||||
| int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | ||||
| 			      struct btrfs_inode *inode, u64 start, u64 end); | ||||
| ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from, | ||||
| 			    const struct btrfs_ioctl_encoded_io_args *encoded); | ||||
| int btrfs_release_file(struct inode *inode, struct file *file); | ||||
| int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, | ||||
| 		      size_t num_pages, loff_t pos, size_t write_bytes, | ||||
| 		      struct extent_state **cached, bool noreserve); | ||||
| int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); | ||||
| int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, | ||||
| 			   size_t *write_bytes, bool nowait); | ||||
| void btrfs_check_nocow_unlock(struct btrfs_inode *inode); | ||||
| bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, | ||||
| 				  struct extent_state **cached_state, | ||||
| 				  u64 *delalloc_start_ret, u64 *delalloc_end_ret); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -11,8 +11,10 @@ | |||
| #include <linux/ratelimit.h> | ||||
| #include <linux/error-injection.h> | ||||
| #include <linux/sched/mm.h> | ||||
| #include "misc.h" | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "messages.h" | ||||
| #include "misc.h" | ||||
| #include "free-space-cache.h" | ||||
| #include "transaction.h" | ||||
| #include "disk-io.h" | ||||
|  | @ -24,11 +26,18 @@ | |||
| #include "discard.h" | ||||
| #include "subpage.h" | ||||
| #include "inode-item.h" | ||||
| #include "accessors.h" | ||||
| #include "file-item.h" | ||||
| #include "file.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define BITS_PER_BITMAP		(PAGE_SIZE * 8UL) | ||||
| #define MAX_CACHE_BYTES_PER_GIG	SZ_64K | ||||
| #define FORCE_EXTENT_THRESHOLD	SZ_1M | ||||
| 
 | ||||
| static struct kmem_cache *btrfs_free_space_cachep; | ||||
| static struct kmem_cache *btrfs_free_space_bitmap_cachep; | ||||
| 
 | ||||
| struct btrfs_trim_range { | ||||
| 	u64 start; | ||||
| 	u64 bytes; | ||||
|  | @ -251,7 +260,7 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, | |||
| 	} | ||||
| 	ret = btrfs_orphan_add(trans, BTRFS_I(inode)); | ||||
| 	if (ret) { | ||||
| 		btrfs_add_delayed_iput(inode); | ||||
| 		btrfs_add_delayed_iput(BTRFS_I(inode)); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	clear_nlink(inode); | ||||
|  | @ -265,7 +274,7 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, | |||
| 		spin_unlock(&block_group->lock); | ||||
| 	} | ||||
| 	/* One for the lookup ref */ | ||||
| 	btrfs_add_delayed_iput(inode); | ||||
| 	btrfs_add_delayed_iput(BTRFS_I(inode)); | ||||
| 
 | ||||
| 	key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||||
| 	key.type = 0; | ||||
|  | @ -1363,8 +1372,8 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, | |||
| 				     path, block_group->start); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Write out cached info to an inode | ||||
| /*
 | ||||
|  * Write out cached info to an inode. | ||||
|  * | ||||
|  * @root:        root the inode belongs to | ||||
|  * @inode:       freespace inode we are writing out | ||||
|  | @ -2717,8 +2726,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, | |||
| 		btrfs_mark_bg_unused(block_group); | ||||
| 	} else if (bg_reclaim_threshold && | ||||
| 		   reclaimable_unusable >= | ||||
| 		   div_factor_fine(block_group->zone_capacity, | ||||
| 				   bg_reclaim_threshold)) { | ||||
| 		   mult_perc(block_group->zone_capacity, bg_reclaim_threshold)) { | ||||
| 		btrfs_mark_bg_to_reclaim(block_group); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -3028,10 +3036,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) | |||
| 
 | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_is_free_space_trimmed - see if everything is trimmed | ||||
|  * @block_group: block_group of interest | ||||
|  * | ||||
| /*
 | ||||
|  * Walk @block_group's free space rb_tree to determine if everything is trimmed. | ||||
|  */ | ||||
| bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) | ||||
|  | @ -4132,6 +4137,31 @@ out: | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int __init btrfs_free_space_init(void) | ||||
| { | ||||
| 	btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space", | ||||
| 			sizeof(struct btrfs_free_space), 0, | ||||
| 			SLAB_MEM_SPREAD, NULL); | ||||
| 	if (!btrfs_free_space_cachep) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap", | ||||
| 							PAGE_SIZE, PAGE_SIZE, | ||||
| 							SLAB_MEM_SPREAD, NULL); | ||||
| 	if (!btrfs_free_space_bitmap_cachep) { | ||||
| 		kmem_cache_destroy(btrfs_free_space_cachep); | ||||
| 		return -ENOMEM; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_free_space_exit(void) | ||||
| { | ||||
| 	kmem_cache_destroy(btrfs_free_space_cachep); | ||||
| 	kmem_cache_destroy(btrfs_free_space_bitmap_cachep); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||||
| /*
 | ||||
|  * Use this if you need to make a bitmap or extent entry specifically, it | ||||
|  |  | |||
|  | @ -43,6 +43,17 @@ static inline bool btrfs_free_space_trimming_bitmap( | |||
| 	return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Deltas are an effective way to populate global statistics.  Give macro names | ||||
|  * to make it clear what we're doing.  An example is discard_extents in | ||||
|  * btrfs_free_space_ctl. | ||||
|  */ | ||||
| enum { | ||||
| 	BTRFS_STAT_CURR, | ||||
| 	BTRFS_STAT_PREV, | ||||
| 	BTRFS_STAT_NR_ENTRIES, | ||||
| }; | ||||
| 
 | ||||
| struct btrfs_free_space_ctl { | ||||
| 	spinlock_t tree_lock; | ||||
| 	struct rb_root free_space_offset; | ||||
|  | @ -79,6 +90,8 @@ struct btrfs_io_ctl { | |||
| 	int bitmaps; | ||||
| }; | ||||
| 
 | ||||
| int __init btrfs_free_space_init(void); | ||||
| void __cold btrfs_free_space_exit(void); | ||||
| struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, | ||||
| 		struct btrfs_path *path); | ||||
| int create_free_space_inode(struct btrfs_trans_handle *trans, | ||||
|  |  | |||
|  | @ -5,12 +5,17 @@ | |||
| 
 | ||||
| #include <linux/kernel.h> | ||||
| #include <linux/sched/mm.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "locking.h" | ||||
| #include "free-space-tree.h" | ||||
| #include "transaction.h" | ||||
| #include "block-group.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "root-tree.h" | ||||
| 
 | ||||
| static int __add_block_group_free_space(struct btrfs_trans_handle *trans, | ||||
| 					struct btrfs_block_group *block_group, | ||||
|  | @ -803,7 +808,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, | |||
| 	u32 flags; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (block_group->needs_free_space) { | ||||
| 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { | ||||
| 		ret = __add_block_group_free_space(trans, block_group, path); | ||||
| 		if (ret) | ||||
| 			return ret; | ||||
|  | @ -996,7 +1001,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans, | |||
| 	u32 flags; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (block_group->needs_free_space) { | ||||
| 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { | ||||
| 		ret = __add_block_group_free_space(trans, block_group, path); | ||||
| 		if (ret) | ||||
| 			return ret; | ||||
|  | @ -1299,7 +1304,7 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, | |||
| { | ||||
| 	int ret; | ||||
| 
 | ||||
| 	block_group->needs_free_space = 0; | ||||
| 	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); | ||||
| 
 | ||||
| 	ret = add_new_free_space_info(trans, block_group, path); | ||||
| 	if (ret) | ||||
|  | @ -1321,7 +1326,7 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans, | |||
| 		return 0; | ||||
| 
 | ||||
| 	mutex_lock(&block_group->free_space_lock); | ||||
| 	if (!block_group->needs_free_space) | ||||
| 	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
|  | @ -1354,7 +1359,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans, | |||
| 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (block_group->needs_free_space) { | ||||
| 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { | ||||
| 		/* We never added this block group to the free space tree. */ | ||||
| 		return 0; | ||||
| 	} | ||||
|  |  | |||
							
								
								
									
										94
									
								
								fs/btrfs/fs.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								fs/btrfs/fs.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,94 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| 
 | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 			     const char *name) | ||||
| { | ||||
| 	struct btrfs_super_block *disk_super; | ||||
| 	u64 features; | ||||
| 
 | ||||
| 	disk_super = fs_info->super_copy; | ||||
| 	features = btrfs_super_incompat_flags(disk_super); | ||||
| 	if (!(features & flag)) { | ||||
| 		spin_lock(&fs_info->super_lock); | ||||
| 		features = btrfs_super_incompat_flags(disk_super); | ||||
| 		if (!(features & flag)) { | ||||
| 			features |= flag; | ||||
| 			btrfs_set_super_incompat_flags(disk_super, features); | ||||
| 			btrfs_info(fs_info, | ||||
| 				"setting incompat feature flag for %s (0x%llx)", | ||||
| 				name, flag); | ||||
| 		} | ||||
| 		spin_unlock(&fs_info->super_lock); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 			       const char *name) | ||||
| { | ||||
| 	struct btrfs_super_block *disk_super; | ||||
| 	u64 features; | ||||
| 
 | ||||
| 	disk_super = fs_info->super_copy; | ||||
| 	features = btrfs_super_incompat_flags(disk_super); | ||||
| 	if (features & flag) { | ||||
| 		spin_lock(&fs_info->super_lock); | ||||
| 		features = btrfs_super_incompat_flags(disk_super); | ||||
| 		if (features & flag) { | ||||
| 			features &= ~flag; | ||||
| 			btrfs_set_super_incompat_flags(disk_super, features); | ||||
| 			btrfs_info(fs_info, | ||||
| 				"clearing incompat feature flag for %s (0x%llx)", | ||||
| 				name, flag); | ||||
| 		} | ||||
| 		spin_unlock(&fs_info->super_lock); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 			      const char *name) | ||||
| { | ||||
| 	struct btrfs_super_block *disk_super; | ||||
| 	u64 features; | ||||
| 
 | ||||
| 	disk_super = fs_info->super_copy; | ||||
| 	features = btrfs_super_compat_ro_flags(disk_super); | ||||
| 	if (!(features & flag)) { | ||||
| 		spin_lock(&fs_info->super_lock); | ||||
| 		features = btrfs_super_compat_ro_flags(disk_super); | ||||
| 		if (!(features & flag)) { | ||||
| 			features |= flag; | ||||
| 			btrfs_set_super_compat_ro_flags(disk_super, features); | ||||
| 			btrfs_info(fs_info, | ||||
| 				"setting compat-ro feature flag for %s (0x%llx)", | ||||
| 				name, flag); | ||||
| 		} | ||||
| 		spin_unlock(&fs_info->super_lock); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 				const char *name) | ||||
| { | ||||
| 	struct btrfs_super_block *disk_super; | ||||
| 	u64 features; | ||||
| 
 | ||||
| 	disk_super = fs_info->super_copy; | ||||
| 	features = btrfs_super_compat_ro_flags(disk_super); | ||||
| 	if (features & flag) { | ||||
| 		spin_lock(&fs_info->super_lock); | ||||
| 		features = btrfs_super_compat_ro_flags(disk_super); | ||||
| 		if (features & flag) { | ||||
| 			features &= ~flag; | ||||
| 			btrfs_set_super_compat_ro_flags(disk_super, features); | ||||
| 			btrfs_info(fs_info, | ||||
| 				"clearing compat-ro feature flag for %s (0x%llx)", | ||||
| 				name, flag); | ||||
| 		} | ||||
| 		spin_unlock(&fs_info->super_lock); | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										976
									
								
								fs/btrfs/fs.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										976
									
								
								fs/btrfs/fs.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,976 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_FS_H | ||||
| #define BTRFS_FS_H | ||||
| 
 | ||||
| #include <linux/fs.h> | ||||
| #include <linux/btrfs_tree.h> | ||||
| #include <linux/sizes.h> | ||||
| #include "extent-io-tree.h" | ||||
| #include "extent_map.h" | ||||
| #include "async-thread.h" | ||||
| #include "block-rsv.h" | ||||
| 
 | ||||
| #define BTRFS_MAX_EXTENT_SIZE SZ_128M | ||||
| 
 | ||||
| #define BTRFS_OLDEST_GENERATION	0ULL | ||||
| 
 | ||||
| #define BTRFS_EMPTY_DIR_SIZE 0 | ||||
| 
 | ||||
| #define BTRFS_DIRTY_METADATA_THRESH		SZ_32M | ||||
| 
 | ||||
| #define BTRFS_SUPER_INFO_OFFSET			SZ_64K | ||||
| #define BTRFS_SUPER_INFO_SIZE			4096 | ||||
| static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); | ||||
| 
 | ||||
| /*
 | ||||
|  * The reserved space at the beginning of each device.  It covers the primary | ||||
|  * super block and leaves space for potential use by other tools like | ||||
|  * bootloaders or to lower potential damage of accidental overwrite. | ||||
|  */ | ||||
| #define BTRFS_DEVICE_RANGE_RESERVED			(SZ_1M) | ||||
| /*
 | ||||
|  * Runtime (in-memory) states of filesystem | ||||
|  */ | ||||
| enum { | ||||
| 	/* Global indicator of serious filesystem errors */ | ||||
| 	BTRFS_FS_STATE_ERROR, | ||||
| 	/*
 | ||||
| 	 * Filesystem is being remounted, allow to skip some operations, like | ||||
| 	 * defrag | ||||
| 	 */ | ||||
| 	BTRFS_FS_STATE_REMOUNTING, | ||||
| 	/* Filesystem in RO mode */ | ||||
| 	BTRFS_FS_STATE_RO, | ||||
| 	/* Track if a transaction abort has been reported on this filesystem */ | ||||
| 	BTRFS_FS_STATE_TRANS_ABORTED, | ||||
| 	/*
 | ||||
| 	 * Bio operations should be blocked on this filesystem because a source | ||||
| 	 * or target device is being destroyed as part of a device replace | ||||
| 	 */ | ||||
| 	BTRFS_FS_STATE_DEV_REPLACING, | ||||
| 	/* The btrfs_fs_info created for self-tests */ | ||||
| 	BTRFS_FS_STATE_DUMMY_FS_INFO, | ||||
| 
 | ||||
| 	BTRFS_FS_STATE_NO_CSUMS, | ||||
| 
 | ||||
| 	/* Indicates there was an error cleaning up a log tree. */ | ||||
| 	BTRFS_FS_STATE_LOG_CLEANUP_ERROR, | ||||
| 
 | ||||
| 	BTRFS_FS_STATE_COUNT | ||||
| }; | ||||
| 
 | ||||
| enum { | ||||
| 	BTRFS_FS_CLOSING_START, | ||||
| 	BTRFS_FS_CLOSING_DONE, | ||||
| 	BTRFS_FS_LOG_RECOVERING, | ||||
| 	BTRFS_FS_OPEN, | ||||
| 	BTRFS_FS_QUOTA_ENABLED, | ||||
| 	BTRFS_FS_UPDATE_UUID_TREE_GEN, | ||||
| 	BTRFS_FS_CREATING_FREE_SPACE_TREE, | ||||
| 	BTRFS_FS_BTREE_ERR, | ||||
| 	BTRFS_FS_LOG1_ERR, | ||||
| 	BTRFS_FS_LOG2_ERR, | ||||
| 	BTRFS_FS_QUOTA_OVERRIDE, | ||||
| 	/* Used to record internally whether fs has been frozen */ | ||||
| 	BTRFS_FS_FROZEN, | ||||
| 	/*
 | ||||
| 	 * Indicate that balance has been set up from the ioctl and is in the | ||||
| 	 * main phase. The fs_info::balance_ctl is initialized. | ||||
| 	 */ | ||||
| 	BTRFS_FS_BALANCE_RUNNING, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Indicate that relocation of a chunk has started, it's set per chunk | ||||
| 	 * and is toggled between chunks. | ||||
| 	 */ | ||||
| 	BTRFS_FS_RELOC_RUNNING, | ||||
| 
 | ||||
| 	/* Indicate that the cleaner thread is awake and doing something. */ | ||||
| 	BTRFS_FS_CLEANER_RUNNING, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The checksumming has an optimized version and is considered fast, | ||||
| 	 * so we don't need to offload checksums to workqueues. | ||||
| 	 */ | ||||
| 	BTRFS_FS_CSUM_IMPL_FAST, | ||||
| 
 | ||||
| 	/* Indicate that the discard workqueue can service discards. */ | ||||
| 	BTRFS_FS_DISCARD_RUNNING, | ||||
| 
 | ||||
| 	/* Indicate that we need to cleanup space cache v1 */ | ||||
| 	BTRFS_FS_CLEANUP_SPACE_CACHE_V1, | ||||
| 
 | ||||
| 	/* Indicate that we can't trust the free space tree for caching yet */ | ||||
| 	BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, | ||||
| 
 | ||||
| 	/* Indicate whether there are any tree modification log users */ | ||||
| 	BTRFS_FS_TREE_MOD_LOG_USERS, | ||||
| 
 | ||||
| 	/* Indicate that we want the transaction kthread to commit right now. */ | ||||
| 	BTRFS_FS_COMMIT_TRANS, | ||||
| 
 | ||||
| 	/* Indicate we have half completed snapshot deletions pending. */ | ||||
| 	BTRFS_FS_UNFINISHED_DROPS, | ||||
| 
 | ||||
| 	/* Indicate we have to finish a zone to do next allocation. */ | ||||
| 	BTRFS_FS_NEED_ZONE_FINISH, | ||||
| 
 | ||||
| 	/* Indicate that we want to commit the transaction. */ | ||||
| 	BTRFS_FS_NEED_TRANS_COMMIT, | ||||
| 
 | ||||
| #if BITS_PER_LONG == 32 | ||||
| 	/* Indicate if we have error/warn message printed on 32bit systems */ | ||||
| 	BTRFS_FS_32BIT_ERROR, | ||||
| 	BTRFS_FS_32BIT_WARN, | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Flags for mount options. | ||||
|  * | ||||
|  * Note: don't forget to add new options to btrfs_show_options() | ||||
|  */ | ||||
| enum { | ||||
| 	BTRFS_MOUNT_NODATASUM			= (1UL << 0), | ||||
| 	BTRFS_MOUNT_NODATACOW			= (1UL << 1), | ||||
| 	BTRFS_MOUNT_NOBARRIER			= (1UL << 2), | ||||
| 	BTRFS_MOUNT_SSD				= (1UL << 3), | ||||
| 	BTRFS_MOUNT_DEGRADED			= (1UL << 4), | ||||
| 	BTRFS_MOUNT_COMPRESS			= (1UL << 5), | ||||
| 	BTRFS_MOUNT_NOTREELOG   		= (1UL << 6), | ||||
| 	BTRFS_MOUNT_FLUSHONCOMMIT		= (1UL << 7), | ||||
| 	BTRFS_MOUNT_SSD_SPREAD			= (1UL << 8), | ||||
| 	BTRFS_MOUNT_NOSSD			= (1UL << 9), | ||||
| 	BTRFS_MOUNT_DISCARD_SYNC		= (1UL << 10), | ||||
| 	BTRFS_MOUNT_FORCE_COMPRESS      	= (1UL << 11), | ||||
| 	BTRFS_MOUNT_SPACE_CACHE			= (1UL << 12), | ||||
| 	BTRFS_MOUNT_CLEAR_CACHE			= (1UL << 13), | ||||
| 	BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED	= (1UL << 14), | ||||
| 	BTRFS_MOUNT_ENOSPC_DEBUG		= (1UL << 15), | ||||
| 	BTRFS_MOUNT_AUTO_DEFRAG			= (1UL << 16), | ||||
| 	BTRFS_MOUNT_USEBACKUPROOT		= (1UL << 17), | ||||
| 	BTRFS_MOUNT_SKIP_BALANCE		= (1UL << 18), | ||||
| 	BTRFS_MOUNT_CHECK_INTEGRITY		= (1UL << 19), | ||||
| 	BTRFS_MOUNT_CHECK_INTEGRITY_DATA	= (1UL << 20), | ||||
| 	BTRFS_MOUNT_PANIC_ON_FATAL_ERROR	= (1UL << 21), | ||||
| 	BTRFS_MOUNT_RESCAN_UUID_TREE		= (1UL << 22), | ||||
| 	BTRFS_MOUNT_FRAGMENT_DATA		= (1UL << 23), | ||||
| 	BTRFS_MOUNT_FRAGMENT_METADATA		= (1UL << 24), | ||||
| 	BTRFS_MOUNT_FREE_SPACE_TREE		= (1UL << 25), | ||||
| 	BTRFS_MOUNT_NOLOGREPLAY			= (1UL << 26), | ||||
| 	BTRFS_MOUNT_REF_VERIFY			= (1UL << 27), | ||||
| 	BTRFS_MOUNT_DISCARD_ASYNC		= (1UL << 28), | ||||
| 	BTRFS_MOUNT_IGNOREBADROOTS		= (1UL << 29), | ||||
| 	BTRFS_MOUNT_IGNOREDATACSUMS		= (1UL << 30), | ||||
| 	BTRFS_MOUNT_NODISCARD			= (1UL << 31), | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Compat flags that we support.  If any incompat flags are set other than the | ||||
|  * ones specified below then we will fail to mount | ||||
|  */ | ||||
| #define BTRFS_FEATURE_COMPAT_SUPP		0ULL | ||||
| #define BTRFS_FEATURE_COMPAT_SAFE_SET		0ULL | ||||
| #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR		0ULL | ||||
| 
 | ||||
| #define BTRFS_FEATURE_COMPAT_RO_SUPP			\ | ||||
| 	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\ | ||||
| 	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \ | ||||
| 	 BTRFS_FEATURE_COMPAT_RO_VERITY |		\ | ||||
| 	 BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE) | ||||
| 
 | ||||
| #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL | ||||
| #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_DEBUG | ||||
| /*
 | ||||
|  * Extent tree v2 supported only with CONFIG_BTRFS_DEBUG | ||||
|  */ | ||||
| #define BTRFS_FEATURE_INCOMPAT_SUPP			\ | ||||
| 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_NO_HOLES	|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_RAID1C34	|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_ZONED		|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) | ||||
| #else | ||||
| #define BTRFS_FEATURE_INCOMPAT_SUPP			\ | ||||
| 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_NO_HOLES	|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_RAID1C34	|	\ | ||||
| 	 BTRFS_FEATURE_INCOMPAT_ZONED) | ||||
| #endif | ||||
| 
 | ||||
| #define BTRFS_FEATURE_INCOMPAT_SAFE_SET			\ | ||||
| 	(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||||
| #define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR		0ULL | ||||
| 
 | ||||
| #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30) | ||||
| #define BTRFS_DEFAULT_MAX_INLINE	(2048) | ||||
| 
 | ||||
| struct btrfs_dev_replace { | ||||
| 	/* See #define above */ | ||||
| 	u64 replace_state; | ||||
| 	/* Seconds since 1-Jan-1970 */ | ||||
| 	time64_t time_started; | ||||
| 	/* Seconds since 1-Jan-1970 */ | ||||
| 	time64_t time_stopped; | ||||
| 	atomic64_t num_write_errors; | ||||
| 	atomic64_t num_uncorrectable_read_errors; | ||||
| 
 | ||||
| 	u64 cursor_left; | ||||
| 	u64 committed_cursor_left; | ||||
| 	u64 cursor_left_last_write_of_item; | ||||
| 	u64 cursor_right; | ||||
| 
 | ||||
| 	/* See #define above */ | ||||
| 	u64 cont_reading_from_srcdev_mode; | ||||
| 
 | ||||
| 	int is_valid; | ||||
| 	int item_needs_writeback; | ||||
| 	struct btrfs_device *srcdev; | ||||
| 	struct btrfs_device *tgtdev; | ||||
| 
 | ||||
| 	struct mutex lock_finishing_cancel_unmount; | ||||
| 	struct rw_semaphore rwsem; | ||||
| 
 | ||||
| 	struct btrfs_scrub_progress scrub_progress; | ||||
| 
 | ||||
| 	struct percpu_counter bio_counter; | ||||
| 	wait_queue_head_t replace_wait; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Free clusters are used to claim free space in relatively large chunks, | ||||
|  * allowing us to do less seeky writes. They are used for all metadata | ||||
|  * allocations. In ssd_spread mode they are also used for data allocations. | ||||
|  */ | ||||
| struct btrfs_free_cluster { | ||||
| 	spinlock_t lock; | ||||
| 	spinlock_t refill_lock; | ||||
| 	struct rb_root root; | ||||
| 
 | ||||
| 	/* Largest extent in this cluster */ | ||||
| 	u64 max_size; | ||||
| 
 | ||||
| 	/* First extent starting offset */ | ||||
| 	u64 window_start; | ||||
| 
 | ||||
| 	/* We did a full search and couldn't create a cluster */ | ||||
| 	bool fragmented; | ||||
| 
 | ||||
| 	struct btrfs_block_group *block_group; | ||||
| 	/*
 | ||||
| 	 * When a cluster is allocated from a block group, we put the cluster | ||||
| 	 * onto a list in the block group so that it can be freed before the | ||||
| 	 * block group is freed. | ||||
| 	 */ | ||||
| 	struct list_head block_group_list; | ||||
| }; | ||||
| 
 | ||||
| /* Discard control. */ | ||||
| /*
 | ||||
|  * Async discard uses multiple lists to differentiate the discard filter | ||||
|  * parameters.  Index 0 is for completely free block groups where we need to | ||||
|  * ensure the entire block group is trimmed without being lossy.  Indices | ||||
|  * afterwards represent monotonically decreasing discard filter sizes to | ||||
|  * prioritize what should be discarded next. | ||||
|  */ | ||||
| #define BTRFS_NR_DISCARD_LISTS		3 | ||||
| #define BTRFS_DISCARD_INDEX_UNUSED	0 | ||||
| #define BTRFS_DISCARD_INDEX_START	1 | ||||
| 
 | ||||
| struct btrfs_discard_ctl { | ||||
| 	struct workqueue_struct *discard_workers; | ||||
| 	struct delayed_work work; | ||||
| 	spinlock_t lock; | ||||
| 	struct btrfs_block_group *block_group; | ||||
| 	struct list_head discard_list[BTRFS_NR_DISCARD_LISTS]; | ||||
| 	u64 prev_discard; | ||||
| 	u64 prev_discard_time; | ||||
| 	atomic_t discardable_extents; | ||||
| 	atomic64_t discardable_bytes; | ||||
| 	u64 max_discard_size; | ||||
| 	u64 delay_ms; | ||||
| 	u32 iops_limit; | ||||
| 	u32 kbps_limit; | ||||
| 	u64 discard_extent_bytes; | ||||
| 	u64 discard_bitmap_bytes; | ||||
| 	atomic64_t discard_bytes_saved; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Exclusive operations (device replace, resize, device add/remove, balance) | ||||
|  */ | ||||
| enum btrfs_exclusive_operation { | ||||
| 	BTRFS_EXCLOP_NONE, | ||||
| 	BTRFS_EXCLOP_BALANCE_PAUSED, | ||||
| 	BTRFS_EXCLOP_BALANCE, | ||||
| 	BTRFS_EXCLOP_DEV_ADD, | ||||
| 	BTRFS_EXCLOP_DEV_REMOVE, | ||||
| 	BTRFS_EXCLOP_DEV_REPLACE, | ||||
| 	BTRFS_EXCLOP_RESIZE, | ||||
| 	BTRFS_EXCLOP_SWAP_ACTIVATE, | ||||
| }; | ||||
| 
 | ||||
| /* Store data about transaction commits, exported via sysfs. */ | ||||
| struct btrfs_commit_stats { | ||||
| 	/* Total number of commits */ | ||||
| 	u64 commit_count; | ||||
| 	/* The maximum commit duration so far in ns */ | ||||
| 	u64 max_commit_dur; | ||||
| 	/* The last commit duration in ns */ | ||||
| 	u64 last_commit_dur; | ||||
| 	/* The total commit duration in ns */ | ||||
| 	u64 total_commit_dur; | ||||
| }; | ||||
| 
 | ||||
| struct btrfs_fs_info { | ||||
| 	u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; | ||||
| 	unsigned long flags; | ||||
| 	struct btrfs_root *tree_root; | ||||
| 	struct btrfs_root *chunk_root; | ||||
| 	struct btrfs_root *dev_root; | ||||
| 	struct btrfs_root *fs_root; | ||||
| 	struct btrfs_root *quota_root; | ||||
| 	struct btrfs_root *uuid_root; | ||||
| 	struct btrfs_root *data_reloc_root; | ||||
| 	struct btrfs_root *block_group_root; | ||||
| 
 | ||||
| 	/* The log root tree is a directory of all the other log roots */ | ||||
| 	struct btrfs_root *log_root_tree; | ||||
| 
 | ||||
| 	/* The tree that holds the global roots (csum, extent, etc) */ | ||||
| 	rwlock_t global_root_lock; | ||||
| 	struct rb_root global_root_tree; | ||||
| 
 | ||||
| 	spinlock_t fs_roots_radix_lock; | ||||
| 	struct radix_tree_root fs_roots_radix; | ||||
| 
 | ||||
| 	/* Block group cache stuff */ | ||||
| 	rwlock_t block_group_cache_lock; | ||||
| 	struct rb_root_cached block_group_cache_tree; | ||||
| 
 | ||||
| 	/* Keep track of unallocated space */ | ||||
| 	atomic64_t free_chunk_space; | ||||
| 
 | ||||
| 	/* Track ranges which are used by log trees blocks/logged data extents */ | ||||
| 	struct extent_io_tree excluded_extents; | ||||
| 
 | ||||
| 	/* logical->physical extent mapping */ | ||||
| 	struct extent_map_tree mapping_tree; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Block reservation for extent, checksum, root tree and delayed dir | ||||
| 	 * index item. | ||||
| 	 */ | ||||
| 	struct btrfs_block_rsv global_block_rsv; | ||||
| 	/* Block reservation for metadata operations */ | ||||
| 	struct btrfs_block_rsv trans_block_rsv; | ||||
| 	/* Block reservation for chunk tree */ | ||||
| 	struct btrfs_block_rsv chunk_block_rsv; | ||||
| 	/* Block reservation for delayed operations */ | ||||
| 	struct btrfs_block_rsv delayed_block_rsv; | ||||
| 	/* Block reservation for delayed refs */ | ||||
| 	struct btrfs_block_rsv delayed_refs_rsv; | ||||
| 
 | ||||
| 	struct btrfs_block_rsv empty_block_rsv; | ||||
| 
 | ||||
| 	u64 generation; | ||||
| 	u64 last_trans_committed; | ||||
| 	/*
 | ||||
| 	 * Generation of the last transaction used for block group relocation | ||||
| 	 * since the filesystem was last mounted (or 0 if none happened yet). | ||||
| 	 * Must be written and read while holding btrfs_fs_info::commit_root_sem. | ||||
| 	 */ | ||||
| 	u64 last_reloc_trans; | ||||
| 	u64 avg_delayed_ref_runtime; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This is updated to the current trans every time a full commit is | ||||
| 	 * required instead of the faster short fsync log commits | ||||
| 	 */ | ||||
| 	u64 last_trans_log_full_commit; | ||||
| 	unsigned long mount_opt; | ||||
| 
 | ||||
| 	unsigned long compress_type:4; | ||||
| 	unsigned int compress_level; | ||||
| 	u32 commit_interval; | ||||
| 	/*
 | ||||
| 	 * It is a suggestive number, the read side is safe even it gets a | ||||
| 	 * wrong number because we will write out the data into a regular | ||||
| 	 * extent. The write side(mount/remount) is under ->s_umount lock, | ||||
| 	 * so it is also safe. | ||||
| 	 */ | ||||
| 	u64 max_inline; | ||||
| 
 | ||||
| 	struct btrfs_transaction *running_transaction; | ||||
| 	wait_queue_head_t transaction_throttle; | ||||
| 	wait_queue_head_t transaction_wait; | ||||
| 	wait_queue_head_t transaction_blocked_wait; | ||||
| 	wait_queue_head_t async_submit_wait; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Used to protect the incompat_flags, compat_flags, compat_ro_flags | ||||
| 	 * when they are updated. | ||||
| 	 * | ||||
| 	 * Because we do not clear the flags for ever, so we needn't use | ||||
| 	 * the lock on the read side. | ||||
| 	 * | ||||
| 	 * We also needn't use the lock when we mount the fs, because | ||||
| 	 * there is no other task which will update the flag. | ||||
| 	 */ | ||||
| 	spinlock_t super_lock; | ||||
| 	struct btrfs_super_block *super_copy; | ||||
| 	struct btrfs_super_block *super_for_commit; | ||||
| 	struct super_block *sb; | ||||
| 	struct inode *btree_inode; | ||||
| 	struct mutex tree_log_mutex; | ||||
| 	struct mutex transaction_kthread_mutex; | ||||
| 	struct mutex cleaner_mutex; | ||||
| 	struct mutex chunk_mutex; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This is taken to make sure we don't set block groups ro after the | ||||
| 	 * free space cache has been allocated on them. | ||||
| 	 */ | ||||
| 	struct mutex ro_block_group_mutex; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This is used during read/modify/write to make sure no two ios are | ||||
| 	 * trying to mod the same stripe at the same time. | ||||
| 	 */ | ||||
| 	struct btrfs_stripe_hash_table *stripe_hash_table; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This protects the ordered operations list only while we are | ||||
| 	 * processing all of the entries on it.  This way we make sure the | ||||
| 	 * commit code doesn't find the list temporarily empty because another | ||||
| 	 * function happens to be doing non-waiting preflush before jumping | ||||
| 	 * into the main commit. | ||||
| 	 */ | ||||
| 	struct mutex ordered_operations_mutex; | ||||
| 
 | ||||
| 	struct rw_semaphore commit_root_sem; | ||||
| 
 | ||||
| 	struct rw_semaphore cleanup_work_sem; | ||||
| 
 | ||||
| 	struct rw_semaphore subvol_sem; | ||||
| 
 | ||||
| 	spinlock_t trans_lock; | ||||
| 	/*
 | ||||
| 	 * The reloc mutex goes with the trans lock, it is taken during commit | ||||
| 	 * to protect us from the relocation code. | ||||
| 	 */ | ||||
| 	struct mutex reloc_mutex; | ||||
| 
 | ||||
| 	struct list_head trans_list; | ||||
| 	struct list_head dead_roots; | ||||
| 	struct list_head caching_block_groups; | ||||
| 
 | ||||
| 	spinlock_t delayed_iput_lock; | ||||
| 	struct list_head delayed_iputs; | ||||
| 	atomic_t nr_delayed_iputs; | ||||
| 	wait_queue_head_t delayed_iputs_wait; | ||||
| 
 | ||||
| 	atomic64_t tree_mod_seq; | ||||
| 
 | ||||
| 	/* This protects tree_mod_log and tree_mod_seq_list */ | ||||
| 	rwlock_t tree_mod_log_lock; | ||||
| 	struct rb_root tree_mod_log; | ||||
| 	struct list_head tree_mod_seq_list; | ||||
| 
 | ||||
| 	atomic_t async_delalloc_pages; | ||||
| 
 | ||||
| 	/* This is used to protect the following list -- ordered_roots. */ | ||||
| 	spinlock_t ordered_root_lock; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * All fs/file tree roots in which there are data=ordered extents | ||||
| 	 * pending writeback are added into this list. | ||||
| 	 * | ||||
| 	 * These can span multiple transactions and basically include every | ||||
| 	 * dirty data page that isn't from nodatacow. | ||||
| 	 */ | ||||
| 	struct list_head ordered_roots; | ||||
| 
 | ||||
| 	struct mutex delalloc_root_mutex; | ||||
| 	spinlock_t delalloc_root_lock; | ||||
| 	/* All fs/file tree roots that have delalloc inodes. */ | ||||
| 	struct list_head delalloc_roots; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * There is a pool of worker threads for checksumming during writes and | ||||
| 	 * a pool for checksumming after reads.  This is because readers can | ||||
| 	 * run with FS locks held, and the writers may be waiting for those | ||||
| 	 * locks.  We don't want ordering in the pending list to cause | ||||
| 	 * deadlocks, and so the two are serviced separately. | ||||
| 	 * | ||||
| 	 * A third pool does submit_bio to avoid deadlocking with the other two. | ||||
| 	 */ | ||||
| 	struct btrfs_workqueue *workers; | ||||
| 	struct btrfs_workqueue *hipri_workers; | ||||
| 	struct btrfs_workqueue *delalloc_workers; | ||||
| 	struct btrfs_workqueue *flush_workers; | ||||
| 	struct workqueue_struct *endio_workers; | ||||
| 	struct workqueue_struct *endio_meta_workers; | ||||
| 	struct workqueue_struct *rmw_workers; | ||||
| 	struct workqueue_struct *compressed_write_workers; | ||||
| 	struct btrfs_workqueue *endio_write_workers; | ||||
| 	struct btrfs_workqueue *endio_freespace_worker; | ||||
| 	struct btrfs_workqueue *caching_workers; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Fixup workers take dirty pages that didn't properly go through the | ||||
| 	 * cow mechanism and make them safe to write.  It happens for the | ||||
| 	 * sys_munmap function call path. | ||||
| 	 */ | ||||
| 	struct btrfs_workqueue *fixup_workers; | ||||
| 	struct btrfs_workqueue *delayed_workers; | ||||
| 
 | ||||
| 	struct task_struct *transaction_kthread; | ||||
| 	struct task_struct *cleaner_kthread; | ||||
| 	u32 thread_pool_size; | ||||
| 
 | ||||
| 	struct kobject *space_info_kobj; | ||||
| 	struct kobject *qgroups_kobj; | ||||
| 	struct kobject *discard_kobj; | ||||
| 
 | ||||
| 	/* Used to keep from writing metadata until there is a nice batch */ | ||||
| 	struct percpu_counter dirty_metadata_bytes; | ||||
| 	struct percpu_counter delalloc_bytes; | ||||
| 	struct percpu_counter ordered_bytes; | ||||
| 	s32 dirty_metadata_batch; | ||||
| 	s32 delalloc_batch; | ||||
| 
 | ||||
| 	struct list_head dirty_cowonly_roots; | ||||
| 
 | ||||
| 	struct btrfs_fs_devices *fs_devices; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The space_info list is effectively read only after initial setup. | ||||
| 	 * It is populated at mount time and cleaned up after all block groups | ||||
| 	 * are removed.  RCU is used to protect it. | ||||
| 	 */ | ||||
| 	struct list_head space_info; | ||||
| 
 | ||||
| 	struct btrfs_space_info *data_sinfo; | ||||
| 
 | ||||
| 	struct reloc_control *reloc_ctl; | ||||
| 
 | ||||
| 	/* data_alloc_cluster is only used in ssd_spread mode */ | ||||
| 	struct btrfs_free_cluster data_alloc_cluster; | ||||
| 
 | ||||
| 	/* All metadata allocations go through this cluster. */ | ||||
| 	struct btrfs_free_cluster meta_alloc_cluster; | ||||
| 
 | ||||
| 	/* Auto defrag inodes go here. */ | ||||
| 	spinlock_t defrag_inodes_lock; | ||||
| 	struct rb_root defrag_inodes; | ||||
| 	atomic_t defrag_running; | ||||
| 
 | ||||
| 	/* Used to protect avail_{data, metadata, system}_alloc_bits */ | ||||
| 	seqlock_t profiles_lock; | ||||
| 	/*
 | ||||
| 	 * These three are in extended format (availability of single chunks is | ||||
| 	 * denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other types are denoted | ||||
| 	 * by corresponding BTRFS_BLOCK_GROUP_* bits) | ||||
| 	 */ | ||||
| 	u64 avail_data_alloc_bits; | ||||
| 	u64 avail_metadata_alloc_bits; | ||||
| 	u64 avail_system_alloc_bits; | ||||
| 
 | ||||
| 	/* Balance state */ | ||||
| 	spinlock_t balance_lock; | ||||
| 	struct mutex balance_mutex; | ||||
| 	atomic_t balance_pause_req; | ||||
| 	atomic_t balance_cancel_req; | ||||
| 	struct btrfs_balance_control *balance_ctl; | ||||
| 	wait_queue_head_t balance_wait_q; | ||||
| 
 | ||||
| 	/* Cancellation requests for chunk relocation */ | ||||
| 	atomic_t reloc_cancel_req; | ||||
| 
 | ||||
| 	u32 data_chunk_allocations; | ||||
| 	u32 metadata_ratio; | ||||
| 
 | ||||
| 	void *bdev_holder; | ||||
| 
 | ||||
| 	/* Private scrub information */ | ||||
| 	struct mutex scrub_lock; | ||||
| 	atomic_t scrubs_running; | ||||
| 	atomic_t scrub_pause_req; | ||||
| 	atomic_t scrubs_paused; | ||||
| 	atomic_t scrub_cancel_req; | ||||
| 	wait_queue_head_t scrub_pause_wait; | ||||
| 	/*
 | ||||
| 	 * The worker pointers are NULL iff the refcount is 0, ie. scrub is not | ||||
| 	 * running. | ||||
| 	 */ | ||||
| 	refcount_t scrub_workers_refcnt; | ||||
| 	struct workqueue_struct *scrub_workers; | ||||
| 	struct workqueue_struct *scrub_wr_completion_workers; | ||||
| 	struct workqueue_struct *scrub_parity_workers; | ||||
| 	struct btrfs_subpage_info *subpage_info; | ||||
| 
 | ||||
| 	struct btrfs_discard_ctl discard_ctl; | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||||
| 	u32 check_integrity_print_mask; | ||||
| #endif | ||||
| 	/* Is qgroup tracking in a consistent state? */ | ||||
| 	u64 qgroup_flags; | ||||
| 
 | ||||
| 	/* Holds configuration and tracking. Protected by qgroup_lock. */ | ||||
| 	struct rb_root qgroup_tree; | ||||
| 	spinlock_t qgroup_lock; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Used to avoid frequently calling ulist_alloc()/ulist_free() | ||||
| 	 * when doing qgroup accounting, it must be protected by qgroup_lock. | ||||
| 	 */ | ||||
| 	struct ulist *qgroup_ulist; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Protect user change for quota operations. If a transaction is needed, | ||||
| 	 * it must be started before locking this lock. | ||||
| 	 */ | ||||
| 	struct mutex qgroup_ioctl_lock; | ||||
| 
 | ||||
| 	/* List of dirty qgroups to be written at next commit. */ | ||||
| 	struct list_head dirty_qgroups; | ||||
| 
 | ||||
| 	/* Used by qgroup for an efficient tree traversal. */ | ||||
| 	u64 qgroup_seq; | ||||
| 
 | ||||
| 	/* Qgroup rescan items. */ | ||||
| 	/* Protects the progress item */ | ||||
| 	struct mutex qgroup_rescan_lock; | ||||
| 	struct btrfs_key qgroup_rescan_progress; | ||||
| 	struct btrfs_workqueue *qgroup_rescan_workers; | ||||
| 	struct completion qgroup_rescan_completion; | ||||
| 	struct btrfs_work qgroup_rescan_work; | ||||
| 	/* Protected by qgroup_rescan_lock */ | ||||
| 	bool qgroup_rescan_running; | ||||
| 	u8 qgroup_drop_subtree_thres; | ||||
| 
 | ||||
| 	/* Filesystem state */ | ||||
| 	unsigned long fs_state; | ||||
| 
 | ||||
| 	struct btrfs_delayed_root *delayed_root; | ||||
| 
 | ||||
| 	/* Extent buffer radix tree */ | ||||
| 	spinlock_t buffer_lock; | ||||
| 	/* Entries are eb->start / sectorsize */ | ||||
| 	struct radix_tree_root buffer_radix; | ||||
| 
 | ||||
| 	/* Next backup root to be overwritten */ | ||||
| 	int backup_root_index; | ||||
| 
 | ||||
| 	/* Device replace state */ | ||||
| 	struct btrfs_dev_replace dev_replace; | ||||
| 
 | ||||
| 	struct semaphore uuid_tree_rescan_sem; | ||||
| 
 | ||||
| 	/* Used to reclaim the metadata space in the background. */ | ||||
| 	struct work_struct async_reclaim_work; | ||||
| 	struct work_struct async_data_reclaim_work; | ||||
| 	struct work_struct preempt_reclaim_work; | ||||
| 
 | ||||
| 	/* Reclaim partially filled block groups in the background */ | ||||
| 	struct work_struct reclaim_bgs_work; | ||||
| 	struct list_head reclaim_bgs; | ||||
| 	int bg_reclaim_threshold; | ||||
| 
 | ||||
| 	spinlock_t unused_bgs_lock; | ||||
| 	struct list_head unused_bgs; | ||||
| 	struct mutex unused_bg_unpin_mutex; | ||||
| 	/* Protect block groups that are going to be deleted */ | ||||
| 	struct mutex reclaim_bgs_lock; | ||||
| 
 | ||||
| 	/* Cached block sizes */ | ||||
| 	u32 nodesize; | ||||
| 	u32 sectorsize; | ||||
| 	/* ilog2 of sectorsize, use to avoid 64bit division */ | ||||
| 	u32 sectorsize_bits; | ||||
| 	u32 csum_size; | ||||
| 	u32 csums_per_leaf; | ||||
| 	u32 stripesize; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular | ||||
| 	 * filesystem, on zoned it depends on the device constraints. | ||||
| 	 */ | ||||
| 	u64 max_extent_size; | ||||
| 
 | ||||
| 	/* Block groups and devices containing active swapfiles. */ | ||||
| 	spinlock_t swapfile_pins_lock; | ||||
| 	struct rb_root swapfile_pins; | ||||
| 
 | ||||
| 	struct crypto_shash *csum_shash; | ||||
| 
 | ||||
| 	/* Type of exclusive operation running, protected by super_lock */ | ||||
| 	enum btrfs_exclusive_operation exclusive_operation; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Zone size > 0 when in ZONED mode, otherwise it's used for a check | ||||
| 	 * if the mode is enabled | ||||
| 	 */ | ||||
| 	u64 zone_size; | ||||
| 
 | ||||
| 	/* Max size to emit ZONE_APPEND write command */ | ||||
| 	u64 max_zone_append_size; | ||||
| 	struct mutex zoned_meta_io_lock; | ||||
| 	spinlock_t treelog_bg_lock; | ||||
| 	u64 treelog_bg; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Start of the dedicated data relocation block group, protected by | ||||
| 	 * relocation_bg_lock. | ||||
| 	 */ | ||||
| 	spinlock_t relocation_bg_lock; | ||||
| 	u64 data_reloc_bg; | ||||
| 	struct mutex zoned_data_reloc_io_lock; | ||||
| 
 | ||||
| 	u64 nr_global_roots; | ||||
| 
 | ||||
| 	spinlock_t zone_active_bgs_lock; | ||||
| 	struct list_head zone_active_bgs; | ||||
| 
 | ||||
| 	/* Updates are not protected by any lock */ | ||||
| 	struct btrfs_commit_stats commit_stats; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Last generation where we dropped a non-relocation root. | ||||
| 	 * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen() | ||||
| 	 * to change it and to read it, respectively. | ||||
| 	 */ | ||||
| 	u64 last_root_drop_gen; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Annotations for transaction events (structures are empty when | ||||
| 	 * compiled without lockdep). | ||||
| 	 */ | ||||
| 	struct lockdep_map btrfs_trans_num_writers_map; | ||||
| 	struct lockdep_map btrfs_trans_num_extwriters_map; | ||||
| 	struct lockdep_map btrfs_state_change_map[4]; | ||||
| 	struct lockdep_map btrfs_trans_pending_ordered_map; | ||||
| 	struct lockdep_map btrfs_ordered_extent_map; | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||||
| 	spinlock_t ref_verify_lock; | ||||
| 	struct rb_root block_tree; | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_DEBUG | ||||
| 	struct kobject *debug_kobj; | ||||
| 	struct list_head allocated_roots; | ||||
| 
 | ||||
| 	spinlock_t eb_leak_lock; | ||||
| 	struct list_head allocated_ebs; | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info, | ||||
| 						u64 gen) | ||||
| { | ||||
| 	WRITE_ONCE(fs_info->last_root_drop_gen, gen); | ||||
| } | ||||
| 
 | ||||
| static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	return READ_ONCE(fs_info->last_root_drop_gen); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Take the number of bytes to be checksummed and figure out how many leaves | ||||
|  * it would require to store the csums for that many bytes. | ||||
|  */ | ||||
| static inline u64 btrfs_csum_bytes_to_leaves( | ||||
| 			const struct btrfs_fs_info *fs_info, u64 csum_bytes) | ||||
| { | ||||
| 	const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits; | ||||
| 
 | ||||
| 	return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Use this if we would be adding new items, as we could split nodes as we cow | ||||
|  * down the tree. | ||||
|  */ | ||||
| static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info, | ||||
| 						  unsigned num_items) | ||||
| { | ||||
| 	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Doing a truncate or a modification won't result in new nodes or leaves, just | ||||
|  * what we need for COW. | ||||
|  */ | ||||
| static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info, | ||||
| 						 unsigned num_items) | ||||
| { | ||||
| 	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; | ||||
| } | ||||
| 
 | ||||
| #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ | ||||
| 					sizeof(struct btrfs_item)) | ||||
| 
 | ||||
| static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	return fs_info->zone_size > 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Count how many fs_info->max_extent_size cover the @size | ||||
|  */ | ||||
| static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size) | ||||
| { | ||||
| #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||||
| 	if (!fs_info) | ||||
| 		return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); | ||||
| #endif | ||||
| 
 | ||||
| 	return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); | ||||
| } | ||||
| 
 | ||||
| bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, | ||||
| 			enum btrfs_exclusive_operation type); | ||||
| bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, | ||||
| 				 enum btrfs_exclusive_operation type); | ||||
| void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_exclop_finish(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, | ||||
| 			  enum btrfs_exclusive_operation op); | ||||
| 
 | ||||
| /* Compatibility and incompatibility defines */ | ||||
| void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 			     const char *name); | ||||
| void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 			       const char *name); | ||||
| void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 			      const char *name); | ||||
| void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, | ||||
| 				const char *name); | ||||
| 
 | ||||
| #define __btrfs_fs_incompat(fs_info, flags)				\ | ||||
| 	(!!(btrfs_super_incompat_flags((fs_info)->super_copy) & (flags))) | ||||
| 
 | ||||
| #define __btrfs_fs_compat_ro(fs_info, flags)				\ | ||||
| 	(!!(btrfs_super_compat_ro_flags((fs_info)->super_copy) & (flags))) | ||||
| 
 | ||||
| #define btrfs_set_fs_incompat(__fs_info, opt)				\ | ||||
| 	__btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) | ||||
| 
 | ||||
| #define btrfs_clear_fs_incompat(__fs_info, opt)				\ | ||||
| 	__btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) | ||||
| 
 | ||||
| #define btrfs_fs_incompat(fs_info, opt)					\ | ||||
| 	__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) | ||||
| 
 | ||||
| #define btrfs_set_fs_compat_ro(__fs_info, opt)				\ | ||||
| 	__btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) | ||||
| 
 | ||||
| #define btrfs_clear_fs_compat_ro(__fs_info, opt)			\ | ||||
| 	__btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) | ||||
| 
 | ||||
| #define btrfs_fs_compat_ro(fs_info, opt)				\ | ||||
| 	__btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) | ||||
| 
 | ||||
| #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt) | ||||
| #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt) | ||||
| #define btrfs_raw_test_opt(o, opt)	((o) & BTRFS_MOUNT_##opt) | ||||
| #define btrfs_test_opt(fs_info, opt)	((fs_info)->mount_opt & \ | ||||
| 					 BTRFS_MOUNT_##opt) | ||||
| 
 | ||||
| #define btrfs_set_and_info(fs_info, opt, fmt, args...)			\ | ||||
| do {									\ | ||||
| 	if (!btrfs_test_opt(fs_info, opt))				\ | ||||
| 		btrfs_info(fs_info, fmt, ##args);			\ | ||||
| 	btrfs_set_opt(fs_info->mount_opt, opt);				\ | ||||
| } while (0) | ||||
| 
 | ||||
| #define btrfs_clear_and_info(fs_info, opt, fmt, args...)		\ | ||||
| do {									\ | ||||
| 	if (btrfs_test_opt(fs_info, opt))				\ | ||||
| 		btrfs_info(fs_info, fmt, ##args);			\ | ||||
| 	btrfs_clear_opt(fs_info->mount_opt, opt);			\ | ||||
| } while (0) | ||||
| 
 | ||||
| static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	/* Do it this way so we only ever do one test_bit in the normal case. */ | ||||
| 	if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { | ||||
| 		if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) | ||||
| 			return 2; | ||||
| 		return 1; | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * If we remount the fs to be R/O or umount the fs, the cleaner needn't do | ||||
|  * anything except sleeping. This function is used to check the status of | ||||
|  * the fs. | ||||
|  * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, | ||||
|  * since setting and checking for SB_RDONLY in the superblock's flags is not | ||||
|  * atomic. | ||||
|  */ | ||||
| static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || | ||||
| 		btrfs_fs_closing(fs_info); | ||||
| } | ||||
| 
 | ||||
| static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags); | ||||
| } | ||||
| 
 | ||||
| #define BTRFS_FS_ERROR(fs_info)	(unlikely(test_bit(BTRFS_FS_STATE_ERROR, \ | ||||
| 						   &(fs_info)->fs_state))) | ||||
| #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info)				\ | ||||
| 	(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,		\ | ||||
| 			   &(fs_info)->fs_state))) | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||||
| 
 | ||||
| #define EXPORT_FOR_TESTS | ||||
| 
 | ||||
| static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); | ||||
| } | ||||
| 
 | ||||
| void btrfs_test_destroy_inode(struct inode *inode); | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| #define EXPORT_FOR_TESTS static | ||||
| 
 | ||||
| static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
|  | @ -4,14 +4,20 @@ | |||
|  */ | ||||
| 
 | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "messages.h" | ||||
| #include "inode-item.h" | ||||
| #include "disk-io.h" | ||||
| #include "transaction.h" | ||||
| #include "print-tree.h" | ||||
| #include "space-info.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "file-item.h" | ||||
| 
 | ||||
| struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, | ||||
| 						   int slot, const char *name, | ||||
| 						   int name_len) | ||||
| 						   int slot, | ||||
| 						   const struct fscrypt_str *name) | ||||
| { | ||||
| 	struct btrfs_inode_ref *ref; | ||||
| 	unsigned long ptr; | ||||
|  | @ -27,9 +33,10 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, | |||
| 		len = btrfs_inode_ref_name_len(leaf, ref); | ||||
| 		name_ptr = (unsigned long)(ref + 1); | ||||
| 		cur_offset += len + sizeof(*ref); | ||||
| 		if (len != name_len) | ||||
| 		if (len != name->len) | ||||
| 			continue; | ||||
| 		if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) | ||||
| 		if (memcmp_extent_buffer(leaf, name->name, name_ptr, | ||||
| 					 name->len) == 0) | ||||
| 			return ref; | ||||
| 	} | ||||
| 	return NULL; | ||||
|  | @ -37,7 +44,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, | |||
| 
 | ||||
| struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( | ||||
| 		struct extent_buffer *leaf, int slot, u64 ref_objectid, | ||||
| 		const char *name, int name_len) | ||||
| 		const struct fscrypt_str *name) | ||||
| { | ||||
| 	struct btrfs_inode_extref *extref; | ||||
| 	unsigned long ptr; | ||||
|  | @ -60,9 +67,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( | |||
| 		name_ptr = (unsigned long)(&extref->name); | ||||
| 		ref_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||||
| 
 | ||||
| 		if (ref_name_len == name_len && | ||||
| 		if (ref_name_len == name->len && | ||||
| 		    btrfs_inode_extref_parent(leaf, extref) == ref_objectid && | ||||
| 		    (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) | ||||
| 		    (memcmp_extent_buffer(leaf, name->name, name_ptr, | ||||
| 					  name->len) == 0)) | ||||
| 			return extref; | ||||
| 
 | ||||
| 		cur_offset += ref_name_len + sizeof(*extref); | ||||
|  | @ -75,7 +83,7 @@ struct btrfs_inode_extref * | |||
| btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||||
| 			  struct btrfs_root *root, | ||||
| 			  struct btrfs_path *path, | ||||
| 			  const char *name, int name_len, | ||||
| 			  const struct fscrypt_str *name, | ||||
| 			  u64 inode_objectid, u64 ref_objectid, int ins_len, | ||||
| 			  int cow) | ||||
| { | ||||
|  | @ -84,7 +92,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 	key.objectid = inode_objectid; | ||||
| 	key.type = BTRFS_INODE_EXTREF_KEY; | ||||
| 	key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||||
| 	key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); | ||||
| 
 | ||||
| 	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||||
| 	if (ret < 0) | ||||
|  | @ -92,13 +100,13 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | |||
| 	if (ret > 0) | ||||
| 		return NULL; | ||||
| 	return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], | ||||
| 					      ref_objectid, name, name_len); | ||||
| 					      ref_objectid, name); | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | ||||
| 				  struct btrfs_root *root, | ||||
| 				  const char *name, int name_len, | ||||
| 				  const struct fscrypt_str *name, | ||||
| 				  u64 inode_objectid, u64 ref_objectid, | ||||
| 				  u64 *index) | ||||
| { | ||||
|  | @ -107,14 +115,14 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | |||
| 	struct btrfs_inode_extref *extref; | ||||
| 	struct extent_buffer *leaf; | ||||
| 	int ret; | ||||
| 	int del_len = name_len + sizeof(*extref); | ||||
| 	int del_len = name->len + sizeof(*extref); | ||||
| 	unsigned long ptr; | ||||
| 	unsigned long item_start; | ||||
| 	u32 item_size; | ||||
| 
 | ||||
| 	key.objectid = inode_objectid; | ||||
| 	key.type = BTRFS_INODE_EXTREF_KEY; | ||||
| 	key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||||
| 	key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
| 	if (!path) | ||||
|  | @ -132,7 +140,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | |||
| 	 * readonly. | ||||
| 	 */ | ||||
| 	extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], | ||||
| 						ref_objectid, name, name_len); | ||||
| 						ref_objectid, name); | ||||
| 	if (!extref) { | ||||
| 		btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); | ||||
| 		ret = -EROFS; | ||||
|  | @ -168,8 +176,7 @@ out: | |||
| } | ||||
| 
 | ||||
| int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||||
| 			struct btrfs_root *root, | ||||
| 			const char *name, int name_len, | ||||
| 			struct btrfs_root *root, const struct fscrypt_str *name, | ||||
| 			u64 inode_objectid, u64 ref_objectid, u64 *index) | ||||
| { | ||||
| 	struct btrfs_path *path; | ||||
|  | @ -182,7 +189,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 	u32 sub_item_len; | ||||
| 	int ret; | ||||
| 	int search_ext_refs = 0; | ||||
| 	int del_len = name_len + sizeof(*ref); | ||||
| 	int del_len = name->len + sizeof(*ref); | ||||
| 
 | ||||
| 	key.objectid = inode_objectid; | ||||
| 	key.offset = ref_objectid; | ||||
|  | @ -201,8 +208,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name, | ||||
| 					 name_len); | ||||
| 	ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name); | ||||
| 	if (!ref) { | ||||
| 		ret = -ENOENT; | ||||
| 		search_ext_refs = 1; | ||||
|  | @ -219,7 +225,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 		goto out; | ||||
| 	} | ||||
| 	ptr = (unsigned long)ref; | ||||
| 	sub_item_len = name_len + sizeof(*ref); | ||||
| 	sub_item_len = name->len + sizeof(*ref); | ||||
| 	item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||||
| 	memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, | ||||
| 			      item_size - (ptr + sub_item_len - item_start)); | ||||
|  | @ -233,7 +239,7 @@ out: | |||
| 		 * name in our ref array. Find and remove the extended | ||||
| 		 * inode ref then. | ||||
| 		 */ | ||||
| 		return btrfs_del_inode_extref(trans, root, name, name_len, | ||||
| 		return btrfs_del_inode_extref(trans, root, name, | ||||
| 					      inode_objectid, ref_objectid, index); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -247,12 +253,13 @@ out: | |||
|  */ | ||||
| static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | ||||
| 				     struct btrfs_root *root, | ||||
| 				     const char *name, int name_len, | ||||
| 				     u64 inode_objectid, u64 ref_objectid, u64 index) | ||||
| 				     const struct fscrypt_str *name, | ||||
| 				     u64 inode_objectid, u64 ref_objectid, | ||||
| 				     u64 index) | ||||
| { | ||||
| 	struct btrfs_inode_extref *extref; | ||||
| 	int ret; | ||||
| 	int ins_len = name_len + sizeof(*extref); | ||||
| 	int ins_len = name->len + sizeof(*extref); | ||||
| 	unsigned long ptr; | ||||
| 	struct btrfs_path *path; | ||||
| 	struct btrfs_key key; | ||||
|  | @ -260,7 +267,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 	key.objectid = inode_objectid; | ||||
| 	key.type = BTRFS_INODE_EXTREF_KEY; | ||||
| 	key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||||
| 	key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
| 	if (!path) | ||||
|  | @ -272,7 +279,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | |||
| 		if (btrfs_find_name_in_ext_backref(path->nodes[0], | ||||
| 						   path->slots[0], | ||||
| 						   ref_objectid, | ||||
| 						   name, name_len)) | ||||
| 						   name)) | ||||
| 			goto out; | ||||
| 
 | ||||
| 		btrfs_extend_item(path, ins_len); | ||||
|  | @ -286,12 +293,12 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | |||
| 	ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len; | ||||
| 	extref = (struct btrfs_inode_extref *)ptr; | ||||
| 
 | ||||
| 	btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); | ||||
| 	btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len); | ||||
| 	btrfs_set_inode_extref_index(path->nodes[0], extref, index); | ||||
| 	btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); | ||||
| 
 | ||||
| 	ptr = (unsigned long)&extref->name; | ||||
| 	write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||||
| 	write_extent_buffer(path->nodes[0], name->name, ptr, name->len); | ||||
| 	btrfs_mark_buffer_dirty(path->nodes[0]); | ||||
| 
 | ||||
| out: | ||||
|  | @ -301,8 +308,7 @@ out: | |||
| 
 | ||||
| /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */ | ||||
| int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_root *root, | ||||
| 			   const char *name, int name_len, | ||||
| 			   struct btrfs_root *root, const struct fscrypt_str *name, | ||||
| 			   u64 inode_objectid, u64 ref_objectid, u64 index) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
|  | @ -311,7 +317,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 	struct btrfs_inode_ref *ref; | ||||
| 	unsigned long ptr; | ||||
| 	int ret; | ||||
| 	int ins_len = name_len + sizeof(*ref); | ||||
| 	int ins_len = name->len + sizeof(*ref); | ||||
| 
 | ||||
| 	key.objectid = inode_objectid; | ||||
| 	key.offset = ref_objectid; | ||||
|  | @ -327,7 +333,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 	if (ret == -EEXIST) { | ||||
| 		u32 old_size; | ||||
| 		ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], | ||||
| 						 name, name_len); | ||||
| 						 name); | ||||
| 		if (ref) | ||||
| 			goto out; | ||||
| 
 | ||||
|  | @ -336,7 +342,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 		ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||||
| 				     struct btrfs_inode_ref); | ||||
| 		ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); | ||||
| 		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | ||||
| 		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len); | ||||
| 		btrfs_set_inode_ref_index(path->nodes[0], ref, index); | ||||
| 		ptr = (unsigned long)(ref + 1); | ||||
| 		ret = 0; | ||||
|  | @ -344,7 +350,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 		if (ret == -EOVERFLOW) { | ||||
| 			if (btrfs_find_name_in_backref(path->nodes[0], | ||||
| 						       path->slots[0], | ||||
| 						       name, name_len)) | ||||
| 						       name)) | ||||
| 				ret = -EEXIST; | ||||
| 			else | ||||
| 				ret = -EMLINK; | ||||
|  | @ -353,11 +359,11 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 	} else { | ||||
| 		ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||||
| 				     struct btrfs_inode_ref); | ||||
| 		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | ||||
| 		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len); | ||||
| 		btrfs_set_inode_ref_index(path->nodes[0], ref, index); | ||||
| 		ptr = (unsigned long)(ref + 1); | ||||
| 	} | ||||
| 	write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||||
| 	write_extent_buffer(path->nodes[0], name->name, ptr, name->len); | ||||
| 	btrfs_mark_buffer_dirty(path->nodes[0]); | ||||
| 
 | ||||
| out: | ||||
|  | @ -370,7 +376,6 @@ out: | |||
| 		if (btrfs_super_incompat_flags(disk_super) | ||||
| 		    & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||||
| 			ret = btrfs_insert_inode_extref(trans, root, name, | ||||
| 							name_len, | ||||
| 							inode_objectid, | ||||
| 							ref_objectid, index); | ||||
| 	} | ||||
|  |  | |||
|  | @ -64,33 +64,31 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 			       struct btrfs_root *root, | ||||
| 			       struct btrfs_truncate_control *control); | ||||
| int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_root *root, | ||||
| 			   const char *name, int name_len, | ||||
| 			   struct btrfs_root *root, const struct fscrypt_str *name, | ||||
| 			   u64 inode_objectid, u64 ref_objectid, u64 index); | ||||
| int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||||
| 			   struct btrfs_root *root, | ||||
| 			   const char *name, int name_len, | ||||
| 			   u64 inode_objectid, u64 ref_objectid, u64 *index); | ||||
| 			struct btrfs_root *root, const struct fscrypt_str *name, | ||||
| 			u64 inode_objectid, u64 ref_objectid, u64 *index); | ||||
| int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | ||||
| 			     struct btrfs_root *root, | ||||
| 			     struct btrfs_path *path, u64 objectid); | ||||
| int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | ||||
| 		       *root, struct btrfs_path *path, | ||||
| int btrfs_lookup_inode(struct btrfs_trans_handle *trans, | ||||
| 		       struct btrfs_root *root, struct btrfs_path *path, | ||||
| 		       struct btrfs_key *location, int mod); | ||||
| 
 | ||||
| struct btrfs_inode_extref *btrfs_lookup_inode_extref( | ||||
| 			  struct btrfs_trans_handle *trans, | ||||
| 			  struct btrfs_root *root, | ||||
| 			  struct btrfs_path *path, | ||||
| 			  const char *name, int name_len, | ||||
| 			  const struct fscrypt_str *name, | ||||
| 			  u64 inode_objectid, u64 ref_objectid, int ins_len, | ||||
| 			  int cow); | ||||
| 
 | ||||
| struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, | ||||
| 						   int slot, const char *name, | ||||
| 						   int name_len); | ||||
| 						   int slot, | ||||
| 						   const struct fscrypt_str *name); | ||||
| struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( | ||||
| 		struct extent_buffer *leaf, int slot, u64 ref_objectid, | ||||
| 		const char *name, int name_len); | ||||
| 		const struct fscrypt_str *name); | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
							
								
								
									
										904
									
								
								fs/btrfs/inode.c
									
										
									
									
									
								
							
							
						
						
									
										904
									
								
								fs/btrfs/inode.c
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										945
									
								
								fs/btrfs/ioctl.c
									
										
									
									
									
								
							
							
						
						
									
										945
									
								
								fs/btrfs/ioctl.c
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										17
									
								
								fs/btrfs/ioctl.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								fs/btrfs/ioctl.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,17 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_IOCTL_H | ||||
| #define BTRFS_IOCTL_H | ||||
| 
 | ||||
| long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||||
| long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||||
| int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); | ||||
| int btrfs_fileattr_set(struct user_namespace *mnt_userns, | ||||
| 		       struct dentry *dentry, struct fileattr *fa); | ||||
| int btrfs_ioctl_get_supported_features(void __user *arg); | ||||
| void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); | ||||
| int __pure btrfs_is_empty_uuid(u8 *uuid); | ||||
| void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, | ||||
| 				     struct btrfs_ioctl_balance_args *bargs); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -12,6 +12,7 @@ | |||
| #include "ctree.h" | ||||
| #include "extent_io.h" | ||||
| #include "locking.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Lockdep class keys for extent_buffer->lock's in this root.  For a given | ||||
|  |  | |||
|  | @ -78,6 +78,82 @@ enum btrfs_lock_nesting { | |||
| 	BTRFS_NESTING_MAX, | ||||
| }; | ||||
| 
 | ||||
| enum btrfs_lockdep_trans_states { | ||||
| 	BTRFS_LOCKDEP_TRANS_COMMIT_START, | ||||
| 	BTRFS_LOCKDEP_TRANS_UNBLOCKED, | ||||
| 	BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, | ||||
| 	BTRFS_LOCKDEP_TRANS_COMPLETED, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Lockdep annotation for wait events. | ||||
|  * | ||||
|  * @owner:  The struct where the lockdep map is defined | ||||
|  * @lock:   The lockdep map corresponding to a wait event | ||||
|  * | ||||
|  * This macro is used to annotate a wait event. In this case a thread acquires | ||||
|  * the lockdep map as writer (exclusive lock) because it has to block until all | ||||
|  * the threads that hold the lock as readers signal the condition for the wait | ||||
|  * event and release their locks. | ||||
|  */ | ||||
| #define btrfs_might_wait_for_event(owner, lock)					\ | ||||
| 	do {									\ | ||||
| 		rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_);		\ | ||||
| 		rwsem_release(&owner->lock##_map, _THIS_IP_);			\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| /*
 | ||||
|  * Protection for the resource/condition of a wait event. | ||||
|  * | ||||
|  * @owner:  The struct where the lockdep map is defined | ||||
|  * @lock:   The lockdep map corresponding to a wait event | ||||
|  * | ||||
|  * Many threads can modify the condition for the wait event at the same time | ||||
|  * and signal the threads that block on the wait event. The threads that modify | ||||
|  * the condition and do the signaling acquire the lock as readers (shared | ||||
|  * lock). | ||||
|  */ | ||||
| #define btrfs_lockdep_acquire(owner, lock)					\ | ||||
| 	rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_) | ||||
| 
 | ||||
| /*
 | ||||
|  * Used after signaling the condition for a wait event to release the lockdep | ||||
|  * map held by a reader thread. | ||||
|  */ | ||||
| #define btrfs_lockdep_release(owner, lock)					\ | ||||
| 	rwsem_release(&owner->lock##_map, _THIS_IP_) | ||||
| 
 | ||||
| /*
 | ||||
|  * Macros for the transaction states wait events, similar to the generic wait | ||||
|  * event macros. | ||||
|  */ | ||||
| #define btrfs_might_wait_for_state(owner, i)					\ | ||||
| 	do {									\ | ||||
| 		rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \ | ||||
| 		rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_);	\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| #define btrfs_trans_state_lockdep_acquire(owner, i)				\ | ||||
| 	rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_) | ||||
| 
 | ||||
| #define btrfs_trans_state_lockdep_release(owner, i)				\ | ||||
| 	rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_) | ||||
| 
 | ||||
| /* Initialization of the lockdep map */ | ||||
| #define btrfs_lockdep_init_map(owner, lock)					\ | ||||
| 	do {									\ | ||||
| 		static struct lock_class_key lock##_key;			\ | ||||
| 		lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0);	\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| /* Initialization of the transaction states lockdep maps. */ | ||||
| #define btrfs_state_lockdep_init_map(owner, lock, state)			\ | ||||
| 	do {									\ | ||||
| 		static struct lock_class_key lock##_key;			\ | ||||
| 		lockdep_init_map(&owner->btrfs_state_change_map[state], #lock,	\ | ||||
| 				 &lock##_key, 0);				\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| static_assert(BTRFS_NESTING_MAX <= MAX_LOCKDEP_SUBCLASSES, | ||||
| 	      "too many lock subclasses defined"); | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,8 +13,10 @@ | |||
| #include <linux/bio.h> | ||||
| #include <linux/lzo.h> | ||||
| #include <linux/refcount.h> | ||||
| #include "messages.h" | ||||
| #include "compression.h" | ||||
| #include "ctree.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define LZO_LEN	4 | ||||
| 
 | ||||
|  | @ -425,7 +427,7 @@ out: | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||||
| int lzo_decompress(struct list_head *ws, const u8 *data_in, | ||||
| 		struct page *dest_page, unsigned long start_byte, size_t srclen, | ||||
| 		size_t destlen) | ||||
| { | ||||
|  |  | |||
							
								
								
									
										353
									
								
								fs/btrfs/messages.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										353
									
								
								fs/btrfs/messages.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,353 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| 
 | ||||
| #include "fs.h" | ||||
| #include "messages.h" | ||||
| #include "discard.h" | ||||
| #include "transaction.h" | ||||
| #include "space-info.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| 
 | ||||
| #define STATE_STRING_PREFACE	": state " | ||||
| #define STATE_STRING_BUF_LEN	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT) | ||||
| 
 | ||||
| /*
 | ||||
|  * Characters to print to indicate error conditions or uncommon filesystem state. | ||||
|  * RO is not an error. | ||||
|  */ | ||||
| static const char fs_state_chars[] = { | ||||
| 	[BTRFS_FS_STATE_ERROR]			= 'E', | ||||
| 	[BTRFS_FS_STATE_REMOUNTING]		= 'M', | ||||
| 	[BTRFS_FS_STATE_RO]			= 0, | ||||
| 	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A', | ||||
| 	[BTRFS_FS_STATE_DEV_REPLACING]		= 'R', | ||||
| 	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= 0, | ||||
| 	[BTRFS_FS_STATE_NO_CSUMS]		= 'C', | ||||
| 	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= 'L', | ||||
| }; | ||||
| 
 | ||||
| static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf) | ||||
| { | ||||
| 	unsigned int bit; | ||||
| 	bool states_printed = false; | ||||
| 	unsigned long fs_state = READ_ONCE(info->fs_state); | ||||
| 	char *curr = buf; | ||||
| 
 | ||||
| 	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE)); | ||||
| 	curr += sizeof(STATE_STRING_PREFACE) - 1; | ||||
| 
 | ||||
| 	for_each_set_bit(bit, &fs_state, sizeof(fs_state)) { | ||||
| 		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT); | ||||
| 		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) { | ||||
| 			*curr++ = fs_state_chars[bit]; | ||||
| 			states_printed = true; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* If no states were printed, reset the buffer */ | ||||
| 	if (!states_printed) | ||||
| 		curr = buf; | ||||
| 
 | ||||
| 	*curr++ = 0; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Generally the error codes correspond to their respective errors, but there | ||||
|  * are a few special cases. | ||||
|  * | ||||
|  * EUCLEAN: Any sort of corruption that we encounter.  The tree-checker for | ||||
|  *          instance will return EUCLEAN if any of the blocks are corrupted in | ||||
|  *          a way that is problematic.  We want to reserve EUCLEAN for these | ||||
|  *          sort of corruptions. | ||||
|  * | ||||
|  * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we | ||||
|  *        need to use EROFS for this case.  We will have no idea of the | ||||
|  *        original failure, that will have been reported at the time we tripped | ||||
|  *        over the error.  Each subsequent error that doesn't have any context | ||||
|  *        of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR. | ||||
|  */ | ||||
| const char * __attribute_const__ btrfs_decode_error(int errno) | ||||
| { | ||||
| 	char *errstr = "unknown"; | ||||
| 
 | ||||
| 	switch (errno) { | ||||
| 	case -ENOENT:		/* -2 */ | ||||
| 		errstr = "No such entry"; | ||||
| 		break; | ||||
| 	case -EIO:		/* -5 */ | ||||
| 		errstr = "IO failure"; | ||||
| 		break; | ||||
| 	case -ENOMEM:		/* -12*/ | ||||
| 		errstr = "Out of memory"; | ||||
| 		break; | ||||
| 	case -EEXIST:		/* -17 */ | ||||
| 		errstr = "Object already exists"; | ||||
| 		break; | ||||
| 	case -ENOSPC:		/* -28 */ | ||||
| 		errstr = "No space left"; | ||||
| 		break; | ||||
| 	case -EROFS:		/* -30 */ | ||||
| 		errstr = "Readonly filesystem"; | ||||
| 		break; | ||||
| 	case -EOPNOTSUPP:	/* -95 */ | ||||
| 		errstr = "Operation not supported"; | ||||
| 		break; | ||||
| 	case -EUCLEAN:		/* -117 */ | ||||
| 		errstr = "Filesystem corrupted"; | ||||
| 		break; | ||||
| 	case -EDQUOT:		/* -122 */ | ||||
| 		errstr = "Quota exceeded"; | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return errstr; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * __btrfs_handle_fs_error decodes expected errors from the caller and | ||||
|  * invokes the appropriate error response. | ||||
|  */ | ||||
| __cold | ||||
| void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, | ||||
| 		       unsigned int line, int errno, const char *fmt, ...) | ||||
| { | ||||
| 	struct super_block *sb = fs_info->sb; | ||||
| #ifdef CONFIG_PRINTK | ||||
| 	char statestr[STATE_STRING_BUF_LEN]; | ||||
| 	const char *errstr; | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK_INDEX | ||||
| 	printk_index_subsys_emit( | ||||
| 		"BTRFS: error (device %s%s) in %s:%d: errno=%d %s", KERN_CRIT, fmt); | ||||
| #endif | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Special case: if the error is EROFS, and we're already under | ||||
| 	 * SB_RDONLY, then it is safe here. | ||||
| 	 */ | ||||
| 	if (errno == -EROFS && sb_rdonly(sb)) | ||||
| 		return; | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| 	errstr = btrfs_decode_error(errno); | ||||
| 	btrfs_state_to_string(fs_info, statestr); | ||||
| 	if (fmt) { | ||||
| 		struct va_format vaf; | ||||
| 		va_list args; | ||||
| 
 | ||||
| 		va_start(args, fmt); | ||||
| 		vaf.fmt = fmt; | ||||
| 		vaf.va = &args; | ||||
| 
 | ||||
| 		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n", | ||||
| 			sb->s_id, statestr, function, line, errno, errstr, &vaf); | ||||
| 		va_end(args); | ||||
| 	} else { | ||||
| 		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n", | ||||
| 			sb->s_id, statestr, function, line, errno, errstr); | ||||
| 	} | ||||
| #endif | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Today we only save the error info to memory.  Long term we'll also | ||||
| 	 * send it down to the disk. | ||||
| 	 */ | ||||
| 	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); | ||||
| 
 | ||||
| 	/* Don't go through full error handling during mount. */ | ||||
| 	if (!(sb->s_flags & SB_BORN)) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (sb_rdonly(sb)) | ||||
| 		return; | ||||
| 
 | ||||
| 	btrfs_discard_stop(fs_info); | ||||
| 
 | ||||
| 	/* Handle error by forcing the filesystem readonly. */ | ||||
| 	btrfs_set_sb_rdonly(sb); | ||||
| 	btrfs_info(fs_info, "forced readonly"); | ||||
| 	/*
 | ||||
| 	 * Note that a running device replace operation is not canceled here | ||||
| 	 * although there is no way to update the progress. It would add the | ||||
| 	 * risk of a deadlock, therefore the canceling is omitted. The only | ||||
| 	 * penalty is that some I/O remains active until the procedure | ||||
| 	 * completes. The next time when the filesystem is mounted writable | ||||
| 	 * again, the device replace operation continues. | ||||
| 	 */ | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| static const char * const logtypes[] = { | ||||
| 	"emergency", | ||||
| 	"alert", | ||||
| 	"critical", | ||||
| 	"error", | ||||
| 	"warning", | ||||
| 	"notice", | ||||
| 	"info", | ||||
| 	"debug", | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Use one ratelimit state per log level so that a flood of less important | ||||
|  * messages doesn't cause more important ones to be dropped. | ||||
|  */ | ||||
| static struct ratelimit_state printk_limits[] = { | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| }; | ||||
| 
 | ||||
| void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) | ||||
| { | ||||
| 	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0"; | ||||
| 	struct va_format vaf; | ||||
| 	va_list args; | ||||
| 	int kern_level; | ||||
| 	const char *type = logtypes[4]; | ||||
| 	struct ratelimit_state *ratelimit = &printk_limits[4]; | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK_INDEX | ||||
| 	printk_index_subsys_emit("%sBTRFS %s (device %s): ", NULL, fmt); | ||||
| #endif | ||||
| 
 | ||||
| 	va_start(args, fmt); | ||||
| 
 | ||||
| 	while ((kern_level = printk_get_level(fmt)) != 0) { | ||||
| 		size_t size = printk_skip_level(fmt) - fmt; | ||||
| 
 | ||||
| 		if (kern_level >= '0' && kern_level <= '7') { | ||||
| 			memcpy(lvl, fmt,  size); | ||||
| 			lvl[size] = '\0'; | ||||
| 			type = logtypes[kern_level - '0']; | ||||
| 			ratelimit = &printk_limits[kern_level - '0']; | ||||
| 		} | ||||
| 		fmt += size; | ||||
| 	} | ||||
| 
 | ||||
| 	vaf.fmt = fmt; | ||||
| 	vaf.va = &args; | ||||
| 
 | ||||
| 	if (__ratelimit(ratelimit)) { | ||||
| 		if (fs_info) { | ||||
| 			char statestr[STATE_STRING_BUF_LEN]; | ||||
| 
 | ||||
| 			btrfs_state_to_string(fs_info, statestr); | ||||
| 			_printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type, | ||||
| 				fs_info->sb->s_id, statestr, &vaf); | ||||
| 		} else { | ||||
| 			_printk("%sBTRFS %s: %pV\n", lvl, type, &vaf); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	va_end(args); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_ASSERT | ||||
| void __cold btrfs_assertfail(const char *expr, const char *file, int line) | ||||
| { | ||||
| 	pr_err("assertion failed: %s, in %s:%d\n", expr, file, line); | ||||
| 	BUG(); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	btrfs_err(fs_info, | ||||
| "Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel"); | ||||
| } | ||||
| 
 | ||||
| #if BITS_PER_LONG == 32 | ||||
| void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (!test_and_set_bit(BTRFS_FS_32BIT_WARN, &fs_info->flags)) { | ||||
| 		btrfs_warn(fs_info, "reaching 32bit limit for logical addresses"); | ||||
| 		btrfs_warn(fs_info, | ||||
| "due to page cache limit on 32bit systems, btrfs can't access metadata at or beyond %lluT", | ||||
| 			   BTRFS_32BIT_MAX_FILE_SIZE >> 40); | ||||
| 		btrfs_warn(fs_info, | ||||
| 			   "please consider upgrading to 64bit kernel/hardware"); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (!test_and_set_bit(BTRFS_FS_32BIT_ERROR, &fs_info->flags)) { | ||||
| 		btrfs_err(fs_info, "reached 32bit limit for logical addresses"); | ||||
| 		btrfs_err(fs_info, | ||||
| "due to page cache limit on 32bit systems, metadata beyond %lluT can't be accessed", | ||||
| 			  BTRFS_32BIT_MAX_FILE_SIZE >> 40); | ||||
| 		btrfs_err(fs_info, | ||||
| 			   "please consider upgrading to 64bit kernel/hardware"); | ||||
| 	} | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * We only mark the transaction aborted and then set the file system read-only. | ||||
|  * This will prevent new transactions from starting or trying to join this | ||||
|  * one. | ||||
|  * | ||||
|  * This means that error recovery at the call site is limited to freeing | ||||
|  * any local memory allocations and passing the error code up without | ||||
|  * further cleanup. The transaction should complete as it normally would | ||||
|  * in the call path but will return -EIO. | ||||
|  * | ||||
|  * We'll complete the cleanup in btrfs_end_transaction and | ||||
|  * btrfs_commit_transaction. | ||||
|  */ | ||||
| __cold | ||||
| void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | ||||
| 			       const char *function, | ||||
| 			       unsigned int line, int errno, bool first_hit) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = trans->fs_info; | ||||
| 
 | ||||
| 	WRITE_ONCE(trans->aborted, errno); | ||||
| 	WRITE_ONCE(trans->transaction->aborted, errno); | ||||
| 	if (first_hit && errno == -ENOSPC) | ||||
| 		btrfs_dump_space_info_for_trans_abort(fs_info); | ||||
| 	/* Wake up anybody who may be waiting on this transaction */ | ||||
| 	wake_up(&fs_info->transaction_wait); | ||||
| 	wake_up(&fs_info->transaction_blocked_wait); | ||||
| 	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * __btrfs_panic decodes unexpected, fatal errors from the caller, issues an | ||||
|  * alert, and either panics or BUGs, depending on mount options. | ||||
|  */ | ||||
| __cold | ||||
| void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | ||||
| 		   unsigned int line, int errno, const char *fmt, ...) | ||||
| { | ||||
| 	char *s_id = "<unknown>"; | ||||
| 	const char *errstr; | ||||
| 	struct va_format vaf = { .fmt = fmt }; | ||||
| 	va_list args; | ||||
| 
 | ||||
| 	if (fs_info) | ||||
| 		s_id = fs_info->sb->s_id; | ||||
| 
 | ||||
| 	va_start(args, fmt); | ||||
| 	vaf.va = &args; | ||||
| 
 | ||||
| 	errstr = btrfs_decode_error(errno); | ||||
| 	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR))) | ||||
| 		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", | ||||
| 			s_id, function, line, &vaf, errno, errstr); | ||||
| 
 | ||||
| 	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)", | ||||
| 		   function, line, &vaf, errno, errstr); | ||||
| 	va_end(args); | ||||
| 	/* Caller calls BUG() */ | ||||
| } | ||||
							
								
								
									
										245
									
								
								fs/btrfs/messages.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										245
									
								
								fs/btrfs/messages.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,245 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_MESSAGES_H | ||||
| #define BTRFS_MESSAGES_H | ||||
| 
 | ||||
| #include <linux/types.h> | ||||
| 
 | ||||
| struct btrfs_fs_info; | ||||
| struct btrfs_trans_handle; | ||||
| 
 | ||||
| static inline __printf(2, 3) __cold | ||||
| void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| 
 | ||||
| #define btrfs_printk(fs_info, fmt, args...)				\ | ||||
| 	_btrfs_printk(fs_info, fmt, ##args) | ||||
| 
 | ||||
| __printf(2, 3) | ||||
| __cold | ||||
| void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| #define btrfs_printk(fs_info, fmt, args...) \ | ||||
| 	btrfs_no_printk(fs_info, fmt, ##args) | ||||
| #endif | ||||
| 
 | ||||
| #define btrfs_emerg(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_EMERG fmt, ##args) | ||||
| #define btrfs_alert(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_ALERT fmt, ##args) | ||||
| #define btrfs_crit(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_CRIT fmt, ##args) | ||||
| #define btrfs_err(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_ERR fmt, ##args) | ||||
| #define btrfs_warn(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_WARNING fmt, ##args) | ||||
| #define btrfs_notice(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_NOTICE fmt, ##args) | ||||
| #define btrfs_info(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_INFO fmt, ##args) | ||||
| 
 | ||||
| /*
 | ||||
|  * Wrappers that use printk_in_rcu | ||||
|  */ | ||||
| #define btrfs_emerg_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args) | ||||
| #define btrfs_alert_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args) | ||||
| #define btrfs_crit_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args) | ||||
| #define btrfs_err_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args) | ||||
| #define btrfs_warn_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args) | ||||
| #define btrfs_notice_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args) | ||||
| #define btrfs_info_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args) | ||||
| 
 | ||||
| /*
 | ||||
|  * Wrappers that use a ratelimited printk_in_rcu | ||||
|  */ | ||||
| #define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args) | ||||
| #define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args) | ||||
| #define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args) | ||||
| #define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args) | ||||
| #define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args) | ||||
| #define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args) | ||||
| #define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) | ||||
| 
 | ||||
| /*
 | ||||
|  * Wrappers that use a ratelimited printk | ||||
|  */ | ||||
| #define btrfs_emerg_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args) | ||||
| #define btrfs_alert_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args) | ||||
| #define btrfs_crit_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args) | ||||
| #define btrfs_err_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args) | ||||
| #define btrfs_warn_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args) | ||||
| #define btrfs_notice_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args) | ||||
| #define btrfs_info_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args) | ||||
| 
 | ||||
| #if defined(CONFIG_DYNAMIC_DEBUG) | ||||
| #define btrfs_debug(fs_info, fmt, args...)				\ | ||||
| 	_dynamic_func_call_no_desc(fmt, btrfs_printk,			\ | ||||
| 				   fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_in_rcu(fs_info, fmt, args...)			\ | ||||
| 	_dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu,		\ | ||||
| 				   fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...)			\ | ||||
| 	_dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu,		\ | ||||
| 				   fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_rl(fs_info, fmt, args...)				\ | ||||
| 	_dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited,	\ | ||||
| 				   fs_info, KERN_DEBUG fmt, ##args) | ||||
| #elif defined(DEBUG) | ||||
| #define btrfs_debug(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #else | ||||
| #define btrfs_debug(fs_info, fmt, args...) \ | ||||
| 	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ | ||||
| 	btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #define btrfs_debug_rl(fs_info, fmt, args...) \ | ||||
| 	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) | ||||
| #endif | ||||
| 
 | ||||
| #define btrfs_printk_in_rcu(fs_info, fmt, args...)	\ | ||||
| do {							\ | ||||
| 	rcu_read_lock();				\ | ||||
| 	btrfs_printk(fs_info, fmt, ##args);		\ | ||||
| 	rcu_read_unlock();				\ | ||||
| } while (0) | ||||
| 
 | ||||
| #define btrfs_no_printk_in_rcu(fs_info, fmt, args...)	\ | ||||
| do {							\ | ||||
| 	rcu_read_lock();				\ | ||||
| 	btrfs_no_printk(fs_info, fmt, ##args);		\ | ||||
| 	rcu_read_unlock();				\ | ||||
| } while (0) | ||||
| 
 | ||||
| #define btrfs_printk_ratelimited(fs_info, fmt, args...)		\ | ||||
| do {								\ | ||||
| 	static DEFINE_RATELIMIT_STATE(_rs,			\ | ||||
| 		DEFAULT_RATELIMIT_INTERVAL,			\ | ||||
| 		DEFAULT_RATELIMIT_BURST);			\ | ||||
| 	if (__ratelimit(&_rs))					\ | ||||
| 		btrfs_printk(fs_info, fmt, ##args);		\ | ||||
| } while (0) | ||||
| 
 | ||||
| #define btrfs_printk_rl_in_rcu(fs_info, fmt, args...)		\ | ||||
| do {								\ | ||||
| 	rcu_read_lock();					\ | ||||
| 	btrfs_printk_ratelimited(fs_info, fmt, ##args);		\ | ||||
| 	rcu_read_unlock();					\ | ||||
| } while (0) | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_ASSERT | ||||
| void __cold btrfs_assertfail(const char *expr, const char *file, int line); | ||||
| 
 | ||||
| #define ASSERT(expr)						\ | ||||
| 	(likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__)) | ||||
| #else | ||||
| #define ASSERT(expr)	(void)(expr) | ||||
| #endif | ||||
| 
 | ||||
| void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info); | ||||
| 
 | ||||
| __printf(5, 6) | ||||
| __cold | ||||
| void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, | ||||
| 		     unsigned int line, int errno, const char *fmt, ...); | ||||
| 
 | ||||
| const char * __attribute_const__ btrfs_decode_error(int errno); | ||||
| 
 | ||||
| __cold | ||||
| void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | ||||
| 			       const char *function, | ||||
| 			       unsigned int line, int errno, bool first_hit); | ||||
| 
 | ||||
| bool __cold abort_should_print_stack(int errno); | ||||
| 
 | ||||
| /*
 | ||||
|  * Call btrfs_abort_transaction as early as possible when an error condition is | ||||
|  * detected, that way the exact stack trace is reported for some errors. | ||||
|  */ | ||||
| #define btrfs_abort_transaction(trans, errno)			\ | ||||
| do {								\ | ||||
| 	bool first = false;					\ | ||||
| 	/* Report first abort since mount */			\ | ||||
| 	if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,	\ | ||||
| 			      &((trans)->fs_info->fs_state))) {	\ | ||||
| 		first = true;					\ | ||||
| 		if (WARN(abort_should_print_stack(errno),       \ | ||||
| 			KERN_ERR				\ | ||||
| 			"BTRFS: Transaction aborted (error %d)\n",	\ | ||||
| 			(errno))) {					\ | ||||
| 			/* Stack trace printed. */			\ | ||||
| 		} else {						\ | ||||
| 			btrfs_err((trans)->fs_info,			\ | ||||
| 				  "Transaction aborted (error %d)",     \ | ||||
| 				  (errno));			\ | ||||
| 		}						\ | ||||
| 	}							\ | ||||
| 	__btrfs_abort_transaction((trans), __func__,		\ | ||||
| 				  __LINE__, (errno), first);	\ | ||||
| } while (0) | ||||
| 
 | ||||
| #define btrfs_handle_fs_error(fs_info, errno, fmt, args...)		\ | ||||
| 	__btrfs_handle_fs_error((fs_info), __func__, __LINE__,		\ | ||||
| 				(errno), fmt, ##args) | ||||
| 
 | ||||
| __printf(5, 6) | ||||
| __cold | ||||
| void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | ||||
| 		   unsigned int line, int errno, const char *fmt, ...); | ||||
| /*
 | ||||
|  * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic | ||||
|  * will panic().  Otherwise we BUG() here. | ||||
|  */ | ||||
| #define btrfs_panic(fs_info, errno, fmt, args...)			\ | ||||
| do {									\ | ||||
| 	__btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args);	\ | ||||
| 	BUG();								\ | ||||
| } while (0) | ||||
| 
 | ||||
| #if BITS_PER_LONG == 32 | ||||
| #define BTRFS_32BIT_MAX_FILE_SIZE (((u64)ULONG_MAX + 1) << PAGE_SHIFT) | ||||
| /*
 | ||||
|  * The warning threshold is 5/8th of the MAX_LFS_FILESIZE that limits the logical | ||||
|  * addresses of extents. | ||||
|  * | ||||
|  * For 4K page size it's about 10T, for 64K it's 160T. | ||||
|  */ | ||||
| #define BTRFS_32BIT_EARLY_WARN_THRESHOLD (BTRFS_32BIT_MAX_FILE_SIZE * 5 / 8) | ||||
| void btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info); | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
|  | @ -10,6 +10,14 @@ | |||
| 
 | ||||
| #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) | ||||
| 
 | ||||
| /*
 | ||||
|  * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. | ||||
|  */ | ||||
| #define ENUM_BIT(name)                                  \ | ||||
| 	__ ## name ## _BIT,                             \ | ||||
| 	name = (1U << __ ## name ## _BIT),              \ | ||||
| 	__ ## name ## _SEQ = __ ## name ## _BIT | ||||
| 
 | ||||
| static inline void cond_wake_up(struct wait_queue_head *wq) | ||||
| { | ||||
| 	/*
 | ||||
|  | @ -32,22 +40,10 @@ static inline void cond_wake_up_nomb(struct wait_queue_head *wq) | |||
| 		wake_up(wq); | ||||
| } | ||||
| 
 | ||||
| static inline u64 div_factor(u64 num, int factor) | ||||
| static inline u64 mult_perc(u64 num, u32 percent) | ||||
| { | ||||
| 	if (factor == 10) | ||||
| 		return num; | ||||
| 	num *= factor; | ||||
| 	return div_u64(num, 10); | ||||
| 	return div_u64(num * percent, 100); | ||||
| } | ||||
| 
 | ||||
| static inline u64 div_factor_fine(u64 num, int factor) | ||||
| { | ||||
| 	if (factor == 100) | ||||
| 		return num; | ||||
| 	num *= factor; | ||||
| 	return div_u64(num, 100); | ||||
| } | ||||
| 
 | ||||
| /* Copy of is_power_of_two that is 64bit safe */ | ||||
| static inline bool is_power_of_two_u64(u64 n) | ||||
| { | ||||
|  |  | |||
|  | @ -7,6 +7,7 @@ | |||
| #include <linux/blkdev.h> | ||||
| #include <linux/writeback.h> | ||||
| #include <linux/sched/mm.h> | ||||
| #include "messages.h" | ||||
| #include "misc.h" | ||||
| #include "ctree.h" | ||||
| #include "transaction.h" | ||||
|  | @ -17,6 +18,8 @@ | |||
| #include "delalloc-space.h" | ||||
| #include "qgroup.h" | ||||
| #include "subpage.h" | ||||
| #include "file.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| static struct kmem_cache *btrfs_ordered_extent_cache; | ||||
| 
 | ||||
|  | @ -143,7 +146,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| /*
 | ||||
|  * Add an ordered extent to the per-inode tree. | ||||
|  * | ||||
|  * @inode:           Inode that this extent is for. | ||||
|  | @ -501,7 +504,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
| 		ASSERT(list_empty(&entry->log_list)); | ||||
| 		ASSERT(RB_EMPTY_NODE(&entry->rb_node)); | ||||
| 		if (entry->inode) | ||||
| 			btrfs_add_delayed_iput(entry->inode); | ||||
| 			btrfs_add_delayed_iput(BTRFS_I(entry->inode)); | ||||
| 		while (!list_empty(&entry->list)) { | ||||
| 			cur = entry->list.next; | ||||
| 			sum = list_entry(cur, struct btrfs_ordered_sum, list); | ||||
|  | @ -1019,17 +1022,18 @@ out: | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * btrfs_flush_ordered_range - Lock the passed range and ensures all pending | ||||
|  * ordered extents in it are run to completion. | ||||
|  * Lock the passed range and ensures all pending ordered extents in it are run | ||||
|  * to completion. | ||||
|  * | ||||
|  * @inode:        Inode whose ordered tree is to be searched | ||||
|  * @start:        Beginning of range to flush | ||||
|  * @end:          Last byte of range to lock | ||||
|  * @cached_state: If passed, will return the extent state responsible for the | ||||
|  * locked range. It's the caller's responsibility to free the cached state. | ||||
|  *                locked range. It's the caller's responsibility to free the | ||||
|  *                cached state. | ||||
|  * | ||||
|  * This function always returns with the given range locked, ensuring after it's | ||||
|  * called no order extent can be pending. | ||||
|  * Always return with the given range locked, ensuring after it's called no | ||||
|  * order extent can be pending. | ||||
|  */ | ||||
| void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, | ||||
| 					u64 end, | ||||
|  | @ -1069,11 +1073,12 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, | |||
|  * Return true if btrfs_lock_ordered_range does not return any extents, | ||||
|  * otherwise false. | ||||
|  */ | ||||
| bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end) | ||||
| bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, | ||||
| 				  struct extent_state **cached_state) | ||||
| { | ||||
| 	struct btrfs_ordered_extent *ordered; | ||||
| 
 | ||||
| 	if (!try_lock_extent(&inode->io_tree, start, end)) | ||||
| 	if (!try_lock_extent(&inode->io_tree, start, end, cached_state)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1); | ||||
|  | @ -1081,7 +1086,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end) | |||
| 		return true; | ||||
| 
 | ||||
| 	btrfs_put_ordered_extent(ordered); | ||||
| 	unlock_extent(&inode->io_tree, start, end, NULL); | ||||
| 	unlock_extent(&inode->io_tree, start, end, cached_state); | ||||
| 
 | ||||
| 	return false; | ||||
| } | ||||
|  |  | |||
|  | @ -206,7 +206,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, | |||
| void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, | ||||
| 					u64 end, | ||||
| 					struct extent_state **cached_state); | ||||
| bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end); | ||||
| bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, | ||||
| 				  struct extent_state **cached_state); | ||||
| int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, | ||||
| 			       u64 post); | ||||
| int __init ordered_data_init(void); | ||||
|  |  | |||
|  | @ -5,6 +5,7 @@ | |||
| 
 | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "orphan.h" | ||||
| 
 | ||||
| int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | ||||
| 			     struct btrfs_root *root, u64 offset) | ||||
|  |  | |||
							
								
								
									
										11
									
								
								fs/btrfs/orphan.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								fs/btrfs/orphan.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_ORPHAN_H | ||||
| #define BTRFS_ORPHAN_H | ||||
| 
 | ||||
| int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | ||||
| 			     struct btrfs_root *root, u64 offset); | ||||
| int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | ||||
| 			  struct btrfs_root *root, u64 offset); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -3,9 +3,12 @@ | |||
|  * Copyright (C) 2007 Oracle.  All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "print-tree.h" | ||||
| #include "accessors.h" | ||||
| #include "tree-checker.h" | ||||
| 
 | ||||
| struct root_name_map { | ||||
| 	u64 id; | ||||
|  | @ -240,9 +243,9 @@ void btrfs_print_leaf(struct extent_buffer *l) | |||
| 		case BTRFS_DIR_ITEM_KEY: | ||||
| 			di = btrfs_item_ptr(l, i, struct btrfs_dir_item); | ||||
| 			btrfs_dir_item_key_to_cpu(l, di, &found_key); | ||||
| 			pr_info("\t\tdir oid %llu type %u\n", | ||||
| 			pr_info("\t\tdir oid %llu flags %u\n", | ||||
| 				found_key.objectid, | ||||
| 				btrfs_dir_type(l, di)); | ||||
| 				btrfs_dir_flags(l, di)); | ||||
| 			break; | ||||
| 		case BTRFS_ROOT_ITEM_KEY: | ||||
| 			ri = btrfs_item_ptr(l, i, struct btrfs_root_item); | ||||
|  | @ -384,14 +387,16 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow) | |||
| 	if (!follow) | ||||
| 		return; | ||||
| 	for (i = 0; i < nr; i++) { | ||||
| 		struct btrfs_key first_key; | ||||
| 		struct btrfs_tree_parent_check check = { | ||||
| 			.level = level - 1, | ||||
| 			.transid = btrfs_node_ptr_generation(c, i), | ||||
| 			.owner_root = btrfs_header_owner(c), | ||||
| 			.has_first_key = true | ||||
| 		}; | ||||
| 		struct extent_buffer *next; | ||||
| 
 | ||||
| 		btrfs_node_key_to_cpu(c, &first_key, i); | ||||
| 		next = read_tree_block(fs_info, btrfs_node_blockptr(c, i), | ||||
| 				       btrfs_header_owner(c), | ||||
| 				       btrfs_node_ptr_generation(c, i), | ||||
| 				       level - 1, &first_key); | ||||
| 		btrfs_node_key_to_cpu(c, &check.first_key, i); | ||||
| 		next = read_tree_block(fs_info, btrfs_node_blockptr(c, i), &check); | ||||
| 		if (IS_ERR(next)) | ||||
| 			continue; | ||||
| 		if (!extent_buffer_uptodate(next)) { | ||||
|  |  | |||
|  | @ -4,12 +4,17 @@ | |||
|  */ | ||||
| 
 | ||||
| #include <linux/hashtable.h> | ||||
| #include "messages.h" | ||||
| #include "props.h" | ||||
| #include "btrfs_inode.h" | ||||
| #include "transaction.h" | ||||
| #include "ctree.h" | ||||
| #include "xattr.h" | ||||
| #include "compression.h" | ||||
| #include "space-info.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define BTRFS_PROP_HANDLERS_HT_BITS 8 | ||||
| static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); | ||||
|  | @ -453,7 +458,7 @@ int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void __init btrfs_props_init(void) | ||||
| int __init btrfs_props_init(void) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
|  | @ -463,5 +468,6 @@ void __init btrfs_props_init(void) | |||
| 
 | ||||
| 		hash_add(prop_handlers_ht, &p->node, h); | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ | |||
| 
 | ||||
| #include "ctree.h" | ||||
| 
 | ||||
| void __init btrfs_props_init(void); | ||||
| int __init btrfs_props_init(void); | ||||
| 
 | ||||
| int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, | ||||
| 		   const char *name, const char *value, size_t value_len, | ||||
|  |  | |||
|  | @ -24,6 +24,11 @@ | |||
| #include "block-group.h" | ||||
| #include "sysfs.h" | ||||
| #include "tree-mod-log.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "root-tree.h" | ||||
| #include "tree-checker.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Helpers to access qgroup reservation | ||||
|  | @ -1790,8 +1795,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, | |||
| int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, | ||||
| 				   struct btrfs_qgroup_extent_record *qrecord) | ||||
| { | ||||
| 	struct ulist *old_root; | ||||
| 	u64 bytenr = qrecord->bytenr; | ||||
| 	struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -1818,8 +1822,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, | |||
| 	if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, | ||||
| 				   true); | ||||
| 	ctx.bytenr = qrecord->bytenr; | ||||
| 	ctx.fs_info = trans->fs_info; | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&ctx, true); | ||||
| 	if (ret < 0) { | ||||
| 		qgroup_mark_inconsistent(trans->fs_info); | ||||
| 		btrfs_warn(trans->fs_info, | ||||
|  | @ -1835,12 +1841,12 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, | |||
| 	 * | ||||
| 	 * So modifying qrecord->old_roots is safe here | ||||
| 	 */ | ||||
| 	qrecord->old_roots = old_root; | ||||
| 	qrecord->old_roots = ctx.roots; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, | ||||
| 			      u64 num_bytes, gfp_t gfp_flag) | ||||
| 			      u64 num_bytes) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = trans->fs_info; | ||||
| 	struct btrfs_qgroup_extent_record *record; | ||||
|  | @ -1850,7 +1856,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, | |||
| 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) | ||||
| 	    || bytenr == 0 || num_bytes == 0) | ||||
| 		return 0; | ||||
| 	record = kzalloc(sizeof(*record), gfp_flag); | ||||
| 	record = kzalloc(sizeof(*record), GFP_NOFS); | ||||
| 	if (!record) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
|  | @ -1902,8 +1908,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, | |||
| 
 | ||||
| 		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); | ||||
| 
 | ||||
| 		ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes, | ||||
| 						GFP_NOFS); | ||||
| 		ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes); | ||||
| 		if (ret) | ||||
| 			return ret; | ||||
| 	} | ||||
|  | @ -2102,12 +2107,11 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, | |||
| 	 * blocks for qgroup accounting. | ||||
| 	 */ | ||||
| 	ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, | ||||
| 			nodesize, GFP_NOFS); | ||||
| 					nodesize); | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 	ret = btrfs_qgroup_trace_extent(trans, | ||||
| 			dst_path->nodes[dst_level]->start, | ||||
| 			nodesize, GFP_NOFS); | ||||
| 	ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start, | ||||
| 					nodesize); | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 
 | ||||
|  | @ -2336,7 +2340,13 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, | |||
| 	} | ||||
| 
 | ||||
| 	if (!extent_buffer_uptodate(root_eb)) { | ||||
| 		ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL); | ||||
| 		struct btrfs_tree_parent_check check = { | ||||
| 			.has_first_key = false, | ||||
| 			.transid = root_gen, | ||||
| 			.level = root_level | ||||
| 		}; | ||||
| 
 | ||||
| 		ret = btrfs_read_extent_buffer(root_eb, &check); | ||||
| 		if (ret) | ||||
| 			goto out; | ||||
| 	} | ||||
|  | @ -2391,8 +2401,7 @@ walk_down: | |||
| 			path->locks[level] = BTRFS_READ_LOCK; | ||||
| 
 | ||||
| 			ret = btrfs_qgroup_trace_extent(trans, child_bytenr, | ||||
| 							fs_info->nodesize, | ||||
| 							GFP_NOFS); | ||||
| 							fs_info->nodesize); | ||||
| 			if (ret) | ||||
| 				goto out; | ||||
| 		} | ||||
|  | @ -2749,17 +2758,22 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) | |||
| 
 | ||||
| 		if (!ret && !(fs_info->qgroup_flags & | ||||
| 			      BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) { | ||||
| 			struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 
 | ||||
| 			ctx.bytenr = record->bytenr; | ||||
| 			ctx.fs_info = fs_info; | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Old roots should be searched when inserting qgroup | ||||
| 			 * extent record | ||||
| 			 */ | ||||
| 			if (WARN_ON(!record->old_roots)) { | ||||
| 				/* Search commit root to find old_roots */ | ||||
| 				ret = btrfs_find_all_roots(NULL, fs_info, | ||||
| 						record->bytenr, 0, | ||||
| 						&record->old_roots, false); | ||||
| 				ret = btrfs_find_all_roots(&ctx, false); | ||||
| 				if (ret < 0) | ||||
| 					goto cleanup; | ||||
| 				record->old_roots = ctx.roots; | ||||
| 				ctx.roots = NULL; | ||||
| 			} | ||||
| 
 | ||||
| 			/* Free the reserved data space */ | ||||
|  | @ -2772,10 +2786,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) | |||
| 			 * which doesn't lock tree or delayed_refs and search | ||||
| 			 * current root. It's safe inside commit_transaction(). | ||||
| 			 */ | ||||
| 			ret = btrfs_find_all_roots(trans, fs_info, | ||||
| 			   record->bytenr, BTRFS_SEQ_LAST, &new_roots, false); | ||||
| 			ctx.trans = trans; | ||||
| 			ret = btrfs_find_all_roots(&ctx, false); | ||||
| 			if (ret < 0) | ||||
| 				goto cleanup; | ||||
| 			new_roots = ctx.roots; | ||||
| 			if (qgroup_to_skip) { | ||||
| 				ulist_del(new_roots, qgroup_to_skip, 0); | ||||
| 				ulist_del(record->old_roots, qgroup_to_skip, | ||||
|  | @ -3241,7 +3256,6 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, | |||
| 	struct btrfs_root *extent_root; | ||||
| 	struct btrfs_key found; | ||||
| 	struct extent_buffer *scratch_leaf = NULL; | ||||
| 	struct ulist *roots = NULL; | ||||
| 	u64 num_bytes; | ||||
| 	bool done; | ||||
| 	int slot; | ||||
|  | @ -3291,6 +3305,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, | |||
| 	mutex_unlock(&fs_info->qgroup_rescan_lock); | ||||
| 
 | ||||
| 	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { | ||||
| 		struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 
 | ||||
| 		btrfs_item_key_to_cpu(scratch_leaf, &found, slot); | ||||
| 		if (found.type != BTRFS_EXTENT_ITEM_KEY && | ||||
| 		    found.type != BTRFS_METADATA_ITEM_KEY) | ||||
|  | @ -3300,13 +3316,15 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, | |||
| 		else | ||||
| 			num_bytes = found.offset; | ||||
| 
 | ||||
| 		ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, | ||||
| 					   &roots, false); | ||||
| 		ctx.bytenr = found.objectid; | ||||
| 		ctx.fs_info = fs_info; | ||||
| 
 | ||||
| 		ret = btrfs_find_all_roots(&ctx, false); | ||||
| 		if (ret < 0) | ||||
| 			goto out; | ||||
| 		/* For rescan, just pass old_roots as NULL */ | ||||
| 		ret = btrfs_qgroup_account_extent(trans, found.objectid, | ||||
| 						  num_bytes, NULL, roots); | ||||
| 						  num_bytes, NULL, ctx.roots); | ||||
| 		if (ret < 0) | ||||
| 			goto out; | ||||
| 	} | ||||
|  | @ -4292,6 +4310,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, | |||
| 					 struct extent_buffer *subvol_eb) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct btrfs_tree_parent_check check = { 0 }; | ||||
| 	struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; | ||||
| 	struct btrfs_qgroup_swapped_block *block; | ||||
| 	struct extent_buffer *reloc_eb = NULL; | ||||
|  | @ -4340,10 +4359,13 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, | |||
| 	blocks->swapped = swapped; | ||||
| 	spin_unlock(&blocks->lock); | ||||
| 
 | ||||
| 	check.level = block->level; | ||||
| 	check.transid = block->reloc_generation; | ||||
| 	check.has_first_key = true; | ||||
| 	memcpy(&check.first_key, &block->first_key, sizeof(check.first_key)); | ||||
| 
 | ||||
| 	/* Read out reloc subtree root */ | ||||
| 	reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0, | ||||
| 				   block->reloc_generation, block->level, | ||||
| 				   &block->first_key); | ||||
| 	reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check); | ||||
| 	if (IS_ERR(reloc_eb)) { | ||||
| 		ret = PTR_ERR(reloc_eb); | ||||
| 		reloc_eb = NULL; | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include <linux/kobject.h> | ||||
| #include "ulist.h" | ||||
| #include "delayed-ref.h" | ||||
| #include "misc.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Btrfs qgroup overview | ||||
|  | @ -242,9 +243,11 @@ static inline u64 btrfs_qgroup_subvolid(u64 qgroupid) | |||
| /*
 | ||||
|  * For qgroup event trace points only | ||||
|  */ | ||||
| #define QGROUP_RESERVE		(1<<0) | ||||
| #define QGROUP_RELEASE		(1<<1) | ||||
| #define QGROUP_FREE		(1<<2) | ||||
| enum { | ||||
| 	ENUM_BIT(QGROUP_RESERVE), | ||||
| 	ENUM_BIT(QGROUP_RELEASE), | ||||
| 	ENUM_BIT(QGROUP_FREE), | ||||
| }; | ||||
| 
 | ||||
| int btrfs_quota_enable(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_quota_disable(struct btrfs_fs_info *fs_info); | ||||
|  | @ -318,7 +321,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, | |||
|  * (NULL trans) | ||||
|  */ | ||||
| int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, | ||||
| 			      u64 num_bytes, gfp_t gfp_flag); | ||||
| 			      u64 num_bytes); | ||||
| 
 | ||||
| /*
 | ||||
|  * Inform qgroup to trace all leaf items of data | ||||
|  |  | |||
							
								
								
									
										2134
									
								
								fs/btrfs/raid56.c
									
										
									
									
									
								
							
							
						
						
									
										2134
									
								
								fs/btrfs/raid56.c
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -74,12 +74,6 @@ struct btrfs_raid_bio { | |||
| 	/* How many sectors there are for each stripe */ | ||||
| 	u8 stripe_nsectors; | ||||
| 
 | ||||
| 	/* First bad stripe, -1 means no corruption */ | ||||
| 	s8 faila; | ||||
| 
 | ||||
| 	/* Second bad stripe (for RAID6 use) */ | ||||
| 	s8 failb; | ||||
| 
 | ||||
| 	/* Stripe number that we're scrubbing  */ | ||||
| 	u8 scrubp; | ||||
| 
 | ||||
|  | @ -93,9 +87,7 @@ struct btrfs_raid_bio { | |||
| 
 | ||||
| 	atomic_t stripes_pending; | ||||
| 
 | ||||
| 	atomic_t error; | ||||
| 
 | ||||
| 	struct work_struct end_io_work; | ||||
| 	wait_queue_head_t io_wait; | ||||
| 
 | ||||
| 	/* Bitmap to record which horizontal stripe has data */ | ||||
| 	unsigned long dbitmap; | ||||
|  | @ -126,6 +118,29 @@ struct btrfs_raid_bio { | |||
| 
 | ||||
| 	/* Allocated with real_stripes-many pointers for finish_*() calls */ | ||||
| 	void **finish_pointers; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The bitmap recording where IO errors happened. | ||||
| 	 * Each bit is corresponding to one sector in either bio_sectors[] or | ||||
| 	 * stripe_sectors[] array. | ||||
| 	 * | ||||
| 	 * The reason we don't use another bit in sector_ptr is, we have two | ||||
| 	 * arrays of sectors, and a lot of IO can use sectors in both arrays. | ||||
| 	 * Thus making it much harder to iterate. | ||||
| 	 */ | ||||
| 	unsigned long *error_bitmap; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Checksum buffer if the rbio is for data.  The buffer should cover | ||||
| 	 * all data sectors (exlcuding P/Q sectors). | ||||
| 	 */ | ||||
| 	u8 *csum_buf; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Each bit represents if the corresponding sector has data csum found. | ||||
| 	 * Should only cover data sectors (excluding P/Q sectors). | ||||
| 	 */ | ||||
| 	unsigned long *csum_bitmap; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
|  | @ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) | |||
| 					 (len * sizeof(char)), mask); | ||||
| 	if (!ret) | ||||
| 		return ret; | ||||
| 	strncpy(ret->str, src, len); | ||||
| 	/* Warn if the source got unexpectedly truncated. */ | ||||
| 	if (WARN_ON(strscpy(ret->str, src, len) < 0)) { | ||||
| 		kfree(ret); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -5,11 +5,14 @@ | |||
| 
 | ||||
| #include <linux/sched.h> | ||||
| #include <linux/stacktrace.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
| #include "locking.h" | ||||
| #include "delayed-ref.h" | ||||
| #include "ref-verify.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Used to keep track the roots and number of refs each root has for a given | ||||
|  |  | |||
|  | @ -2,13 +2,19 @@ | |||
| 
 | ||||
| #include <linux/blkdev.h> | ||||
| #include <linux/iversion.h> | ||||
| #include "compression.h" | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "messages.h" | ||||
| #include "compression.h" | ||||
| #include "delalloc-space.h" | ||||
| #include "disk-io.h" | ||||
| #include "reflink.h" | ||||
| #include "transaction.h" | ||||
| #include "subpage.h" | ||||
| #include "accessors.h" | ||||
| #include "file-item.h" | ||||
| #include "file.h" | ||||
| #include "super.h" | ||||
| 
 | ||||
| #define BTRFS_MAX_DEDUPE_LEN	SZ_16M | ||||
| 
 | ||||
|  | @ -318,16 +324,16 @@ copy_to_page: | |||
| 	goto out; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * btrfs_clone() - clone a range from inode file to another | ||||
| /*
 | ||||
|  * Clone a range from inode file to another. | ||||
|  * | ||||
|  * @src: Inode to clone from | ||||
|  * @inode: Inode to clone to | ||||
|  * @off: Offset within source to start clone from | ||||
|  * @olen: Original length, passed by user, of range to clone | ||||
|  * @olen_aligned: Block-aligned value of olen | ||||
|  * @destoff: Offset within @inode to start clone | ||||
|  * @no_time_update: Whether to update mtime/ctime on the target inode | ||||
|  * @src:             Inode to clone from | ||||
|  * @inode:           Inode to clone to | ||||
|  * @off:             Offset within source to start clone from | ||||
|  * @olen:            Original length, passed by user, of range to clone | ||||
|  * @olen_aligned:    Block-aligned value of olen | ||||
|  * @destoff:         Offset within @inode to start clone | ||||
|  * @no_time_update:  Whether to update mtime/ctime on the target inode | ||||
|  */ | ||||
| static int btrfs_clone(struct inode *src, struct inode *inode, | ||||
| 		       const u64 off, const u64 olen, const u64 olen_aligned, | ||||
|  | @ -887,7 +893,7 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, | |||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (same_inode) { | ||||
| 		btrfs_inode_lock(src_inode, BTRFS_ILOCK_MMAP); | ||||
| 		btrfs_inode_lock(BTRFS_I(src_inode), BTRFS_ILOCK_MMAP); | ||||
| 	} else { | ||||
| 		lock_two_nondirectories(src_inode, dst_inode); | ||||
| 		btrfs_double_mmap_lock(src_inode, dst_inode); | ||||
|  | @ -905,7 +911,7 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, | |||
| 
 | ||||
| out_unlock: | ||||
| 	if (same_inode) { | ||||
| 		btrfs_inode_unlock(src_inode, BTRFS_ILOCK_MMAP); | ||||
| 		btrfs_inode_unlock(BTRFS_I(src_inode), BTRFS_ILOCK_MMAP); | ||||
| 	} else { | ||||
| 		btrfs_double_mmap_unlock(src_inode, dst_inode); | ||||
| 		unlock_two_nondirectories(src_inode, dst_inode); | ||||
|  |  | |||
|  | @ -27,6 +27,15 @@ | |||
| #include "subpage.h" | ||||
| #include "zoned.h" | ||||
| #include "inode-item.h" | ||||
| #include "space-info.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "root-tree.h" | ||||
| #include "file-item.h" | ||||
| #include "relocation.h" | ||||
| #include "super.h" | ||||
| #include "tree-checker.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Relocation overview | ||||
|  | @ -470,7 +479,7 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree( | |||
| 	int ret; | ||||
| 	int err = 0; | ||||
| 
 | ||||
| 	iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS); | ||||
| 	iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info); | ||||
| 	if (!iter) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 	path = btrfs_alloc_path(); | ||||
|  | @ -1109,10 +1118,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 				inode = find_next_inode(root, key.objectid); | ||||
| 				first = 0; | ||||
| 			} else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) { | ||||
| 				btrfs_add_delayed_iput(inode); | ||||
| 				btrfs_add_delayed_iput(BTRFS_I(inode)); | ||||
| 				inode = find_next_inode(root, key.objectid); | ||||
| 			} | ||||
| 			if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) { | ||||
| 				struct extent_state *cached_state = NULL; | ||||
| 
 | ||||
| 				end = key.offset + | ||||
| 				      btrfs_file_extent_num_bytes(leaf, fi); | ||||
| 				WARN_ON(!IS_ALIGNED(key.offset, | ||||
|  | @ -1120,14 +1131,15 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 				WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize)); | ||||
| 				end--; | ||||
| 				ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | ||||
| 						      key.offset, end); | ||||
| 						      key.offset, end, | ||||
| 						      &cached_state); | ||||
| 				if (!ret) | ||||
| 					continue; | ||||
| 
 | ||||
| 				btrfs_drop_extent_map_range(BTRFS_I(inode), | ||||
| 							    key.offset, end, true); | ||||
| 				unlock_extent(&BTRFS_I(inode)->io_tree, | ||||
| 					      key.offset, end, NULL); | ||||
| 					      key.offset, end, &cached_state); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
|  | @ -1170,7 +1182,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 	if (dirty) | ||||
| 		btrfs_mark_buffer_dirty(leaf); | ||||
| 	if (inode) | ||||
| 		btrfs_add_delayed_iput(inode); | ||||
| 		btrfs_add_delayed_iput(BTRFS_I(inode)); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -1516,6 +1528,8 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
| 
 | ||||
| 	objectid = min_key->objectid; | ||||
| 	while (1) { | ||||
| 		struct extent_state *cached_state = NULL; | ||||
| 
 | ||||
| 		cond_resched(); | ||||
| 		iput(inode); | ||||
| 
 | ||||
|  | @ -1566,9 +1580,9 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
| 		} | ||||
| 
 | ||||
| 		/* the lock_extent waits for read_folio to complete */ | ||||
| 		lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); | ||||
| 		lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); | ||||
| 		btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, true); | ||||
| 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); | ||||
| 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
|  | @ -2597,10 +2611,14 @@ static int tree_block_processed(u64 bytenr, struct reloc_control *rc) | |||
| static int get_tree_block_key(struct btrfs_fs_info *fs_info, | ||||
| 			      struct tree_block *block) | ||||
| { | ||||
| 	struct btrfs_tree_parent_check check = { | ||||
| 		.level = block->level, | ||||
| 		.owner_root = block->owner, | ||||
| 		.transid = block->key.offset | ||||
| 	}; | ||||
| 	struct extent_buffer *eb; | ||||
| 
 | ||||
| 	eb = read_tree_block(fs_info, block->bytenr, block->owner, | ||||
| 			     block->key.offset, block->level, NULL); | ||||
| 	eb = read_tree_block(fs_info, block->bytenr, &check); | ||||
| 	if (IS_ERR(eb)) | ||||
| 		return PTR_ERR(eb); | ||||
| 	if (!extent_buffer_uptodate(eb)) { | ||||
|  | @ -2861,25 +2879,27 @@ static noinline_for_stack int prealloc_file_extent_cluster( | |||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	btrfs_inode_lock(&inode->vfs_inode, 0); | ||||
| 	btrfs_inode_lock(inode, 0); | ||||
| 	for (nr = 0; nr < cluster->nr; nr++) { | ||||
| 		struct extent_state *cached_state = NULL; | ||||
| 
 | ||||
| 		start = cluster->boundary[nr] - offset; | ||||
| 		if (nr + 1 < cluster->nr) | ||||
| 			end = cluster->boundary[nr + 1] - 1 - offset; | ||||
| 		else | ||||
| 			end = cluster->end - offset; | ||||
| 
 | ||||
| 		lock_extent(&inode->io_tree, start, end, NULL); | ||||
| 		lock_extent(&inode->io_tree, start, end, &cached_state); | ||||
| 		num_bytes = end + 1 - start; | ||||
| 		ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start, | ||||
| 						num_bytes, num_bytes, | ||||
| 						end + 1, &alloc_hint); | ||||
| 		cur_offset = end + 1; | ||||
| 		unlock_extent(&inode->io_tree, start, end, NULL); | ||||
| 		unlock_extent(&inode->io_tree, start, end, &cached_state); | ||||
| 		if (ret) | ||||
| 			break; | ||||
| 	} | ||||
| 	btrfs_inode_unlock(&inode->vfs_inode, 0); | ||||
| 	btrfs_inode_unlock(inode, 0); | ||||
| 
 | ||||
| 	if (cur_offset < prealloc_end) | ||||
| 		btrfs_free_reserved_data_space_noquota(inode->root->fs_info, | ||||
|  | @ -2891,6 +2911,7 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod | |||
| 				u64 start, u64 end, u64 block_start) | ||||
| { | ||||
| 	struct extent_map *em; | ||||
| 	struct extent_state *cached_state = NULL; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	em = alloc_extent_map(); | ||||
|  | @ -2903,9 +2924,9 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod | |||
| 	em->block_start = block_start; | ||||
| 	set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||||
| 
 | ||||
| 	lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); | ||||
| 	lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); | ||||
| 	ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false); | ||||
| 	unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL); | ||||
| 	unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); | ||||
| 	free_extent_map(em); | ||||
| 
 | ||||
| 	return ret; | ||||
|  | @ -2983,6 +3004,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, | |||
| 	 */ | ||||
| 	cur = max(page_start, cluster->boundary[*cluster_nr] - offset); | ||||
| 	while (cur <= page_end) { | ||||
| 		struct extent_state *cached_state = NULL; | ||||
| 		u64 extent_start = cluster->boundary[*cluster_nr] - offset; | ||||
| 		u64 extent_end = get_cluster_boundary_end(cluster, | ||||
| 						*cluster_nr) - offset; | ||||
|  | @ -2998,13 +3020,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, | |||
| 			goto release_page; | ||||
| 
 | ||||
| 		/* Mark the range delalloc and dirty for later writeback */ | ||||
| 		lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL); | ||||
| 		lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, | ||||
| 			    &cached_state); | ||||
| 		ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start, | ||||
| 						clamped_end, 0, NULL); | ||||
| 						clamped_end, 0, &cached_state); | ||||
| 		if (ret) { | ||||
| 			clear_extent_bits(&BTRFS_I(inode)->io_tree, | ||||
| 					clamped_start, clamped_end, | ||||
| 					EXTENT_LOCKED | EXTENT_BOUNDARY); | ||||
| 			clear_extent_bit(&BTRFS_I(inode)->io_tree, | ||||
| 					 clamped_start, clamped_end, | ||||
| 					 EXTENT_LOCKED | EXTENT_BOUNDARY, | ||||
| 					 &cached_state); | ||||
| 			btrfs_delalloc_release_metadata(BTRFS_I(inode), | ||||
| 							clamped_len, true); | ||||
| 			btrfs_delalloc_release_extents(BTRFS_I(inode), | ||||
|  | @ -3031,7 +3055,8 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, | |||
| 					boundary_start, boundary_end, | ||||
| 					EXTENT_BOUNDARY); | ||||
| 		} | ||||
| 		unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL); | ||||
| 		unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, | ||||
| 			      &cached_state); | ||||
| 		btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len); | ||||
| 		cur += clamped_len; | ||||
| 
 | ||||
|  | @ -3388,24 +3413,28 @@ int add_data_references(struct reloc_control *rc, | |||
| 			struct btrfs_path *path, | ||||
| 			struct rb_root *blocks) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | ||||
| 	struct ulist *leaves = NULL; | ||||
| 	struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 	struct ulist_iterator leaf_uiter; | ||||
| 	struct ulist_node *ref_node = NULL; | ||||
| 	const u32 blocksize = fs_info->nodesize; | ||||
| 	const u32 blocksize = rc->extent_root->fs_info->nodesize; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	btrfs_release_path(path); | ||||
| 	ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid, | ||||
| 				   0, &leaves, NULL, true); | ||||
| 
 | ||||
| 	ctx.bytenr = extent_key->objectid; | ||||
| 	ctx.ignore_extent_item_pos = true; | ||||
| 	ctx.fs_info = rc->extent_root->fs_info; | ||||
| 
 | ||||
| 	ret = btrfs_find_all_leafs(&ctx); | ||||
| 	if (ret < 0) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	ULIST_ITER_INIT(&leaf_uiter); | ||||
| 	while ((ref_node = ulist_next(leaves, &leaf_uiter))) { | ||||
| 	while ((ref_node = ulist_next(ctx.refs, &leaf_uiter))) { | ||||
| 		struct btrfs_tree_parent_check check = { 0 }; | ||||
| 		struct extent_buffer *eb; | ||||
| 
 | ||||
| 		eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL); | ||||
| 		eb = read_tree_block(ctx.fs_info, ref_node->val, &check); | ||||
| 		if (IS_ERR(eb)) { | ||||
| 			ret = PTR_ERR(eb); | ||||
| 			break; | ||||
|  | @ -3421,7 +3450,7 @@ int add_data_references(struct reloc_control *rc, | |||
| 	} | ||||
| 	if (ret < 0) | ||||
| 		free_block_list(blocks); | ||||
| 	ulist_free(leaves); | ||||
| 	ulist_free(ctx.refs); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -3905,8 +3934,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) | |||
| 	INIT_LIST_HEAD(&rc->dirty_subvol_roots); | ||||
| 	btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1); | ||||
| 	mapping_tree_init(&rc->reloc_root_tree); | ||||
| 	extent_io_tree_init(fs_info, &rc->processed_blocks, | ||||
| 			    IO_TREE_RELOC_BLOCKS, NULL); | ||||
| 	extent_io_tree_init(fs_info, &rc->processed_blocks, IO_TREE_RELOC_BLOCKS); | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
|  | @ -4330,8 +4358,8 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len) | |||
| 
 | ||||
| 	disk_bytenr = file_pos + inode->index_cnt; | ||||
| 	csum_root = btrfs_csum_root(fs_info, disk_bytenr); | ||||
| 	ret = btrfs_lookup_csums_range(csum_root, disk_bytenr, | ||||
| 				       disk_bytenr + len - 1, &list, 0, false); | ||||
| 	ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, | ||||
| 				      disk_bytenr + len - 1, &list, 0, false); | ||||
| 	if (ret) | ||||
| 		goto out; | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										23
									
								
								fs/btrfs/relocation.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								fs/btrfs/relocation.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,23 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_RELOCATION_H | ||||
| #define BTRFS_RELOCATION_H | ||||
| 
 | ||||
| int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start); | ||||
| int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); | ||||
| int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | ||||
| 			    struct btrfs_root *root); | ||||
| int btrfs_recover_relocation(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len); | ||||
| int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||||
| 			  struct btrfs_root *root, struct extent_buffer *buf, | ||||
| 			  struct extent_buffer *cow); | ||||
| void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, | ||||
| 			      u64 *bytes_to_reserve); | ||||
| int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||||
| 			      struct btrfs_pending_snapshot *pending); | ||||
| int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info); | ||||
| struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr); | ||||
| int btrfs_should_ignore_reloc_root(struct btrfs_root *root); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -6,11 +6,16 @@ | |||
| #include <linux/err.h> | ||||
| #include <linux/uuid.h> | ||||
| #include "ctree.h" | ||||
| #include "fs.h" | ||||
| #include "messages.h" | ||||
| #include "transaction.h" | ||||
| #include "disk-io.h" | ||||
| #include "print-tree.h" | ||||
| #include "qgroup.h" | ||||
| #include "space-info.h" | ||||
| #include "accessors.h" | ||||
| #include "root-tree.h" | ||||
| #include "orphan.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Read a root item from the tree. In case we detect a root item smaller then | ||||
|  | @ -327,9 +332,8 @@ out: | |||
| } | ||||
| 
 | ||||
| int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, | ||||
| 		       u64 ref_id, u64 dirid, u64 *sequence, const char *name, | ||||
| 		       int name_len) | ||||
| 
 | ||||
| 		       u64 ref_id, u64 dirid, u64 *sequence, | ||||
| 		       const struct fscrypt_str *name) | ||||
| { | ||||
| 	struct btrfs_root *tree_root = trans->fs_info->tree_root; | ||||
| 	struct btrfs_path *path; | ||||
|  | @ -356,8 +360,8 @@ again: | |||
| 				     struct btrfs_root_ref); | ||||
| 		ptr = (unsigned long)(ref + 1); | ||||
| 		if ((btrfs_root_ref_dirid(leaf, ref) != dirid) || | ||||
| 		    (btrfs_root_ref_name_len(leaf, ref) != name_len) || | ||||
| 		    memcmp_extent_buffer(leaf, name, ptr, name_len)) { | ||||
| 		    (btrfs_root_ref_name_len(leaf, ref) != name->len) || | ||||
| 		    memcmp_extent_buffer(leaf, name->name, ptr, name->len)) { | ||||
| 			ret = -ENOENT; | ||||
| 			goto out; | ||||
| 		} | ||||
|  | @ -400,8 +404,8 @@ out: | |||
|  * Will return 0, -ENOMEM, or anything from the CoW path | ||||
|  */ | ||||
| int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, | ||||
| 		       u64 ref_id, u64 dirid, u64 sequence, const char *name, | ||||
| 		       int name_len) | ||||
| 		       u64 ref_id, u64 dirid, u64 sequence, | ||||
| 		       const struct fscrypt_str *name) | ||||
| { | ||||
| 	struct btrfs_root *tree_root = trans->fs_info->tree_root; | ||||
| 	struct btrfs_key key; | ||||
|  | @ -420,7 +424,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, | |||
| 	key.offset = ref_id; | ||||
| again: | ||||
| 	ret = btrfs_insert_empty_item(trans, tree_root, path, &key, | ||||
| 				      sizeof(*ref) + name_len); | ||||
| 				      sizeof(*ref) + name->len); | ||||
| 	if (ret) { | ||||
| 		btrfs_abort_transaction(trans, ret); | ||||
| 		btrfs_free_path(path); | ||||
|  | @ -431,9 +435,9 @@ again: | |||
| 	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); | ||||
| 	btrfs_set_root_ref_dirid(leaf, ref, dirid); | ||||
| 	btrfs_set_root_ref_sequence(leaf, ref, sequence); | ||||
| 	btrfs_set_root_ref_name_len(leaf, ref, name_len); | ||||
| 	btrfs_set_root_ref_name_len(leaf, ref, name->len); | ||||
| 	ptr = (unsigned long)(ref + 1); | ||||
| 	write_extent_buffer(leaf, name, ptr, name_len); | ||||
| 	write_extent_buffer(leaf, name->name, ptr, name->len); | ||||
| 	btrfs_mark_buffer_dirty(leaf); | ||||
| 
 | ||||
| 	if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||||
|  |  | |||
							
								
								
									
										34
									
								
								fs/btrfs/root-tree.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								fs/btrfs/root-tree.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,34 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_ROOT_TREE_H | ||||
| #define BTRFS_ROOT_TREE_H | ||||
| 
 | ||||
| int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | ||||
| 				     struct btrfs_block_rsv *rsv, | ||||
| 				     int nitems, bool use_global_rsv); | ||||
| void btrfs_subvolume_release_metadata(struct btrfs_root *root, | ||||
| 				      struct btrfs_block_rsv *rsv); | ||||
| int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, | ||||
| 		       u64 ref_id, u64 dirid, u64 sequence, | ||||
| 		       const struct fscrypt_str *name); | ||||
| int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, | ||||
| 		       u64 ref_id, u64 dirid, u64 *sequence, | ||||
| 		       const struct fscrypt_str *name); | ||||
| int btrfs_del_root(struct btrfs_trans_handle *trans, const struct btrfs_key *key); | ||||
| int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||||
| 		      const struct btrfs_key *key, | ||||
| 		      struct btrfs_root_item *item); | ||||
| int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, | ||||
| 				   struct btrfs_root *root, | ||||
| 				   struct btrfs_key *key, | ||||
| 				   struct btrfs_root_item *item); | ||||
| int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key, | ||||
| 		    struct btrfs_path *path, struct btrfs_root_item *root_item, | ||||
| 		    struct btrfs_key *root_key); | ||||
| int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_set_root_node(struct btrfs_root_item *item, | ||||
| 			 struct extent_buffer *node); | ||||
| void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | ||||
| void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -17,10 +17,13 @@ | |||
| #include "extent_io.h" | ||||
| #include "dev-replace.h" | ||||
| #include "check-integrity.h" | ||||
| #include "rcu-string.h" | ||||
| #include "raid56.h" | ||||
| #include "block-group.h" | ||||
| #include "zoned.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "file-item.h" | ||||
| #include "scrub.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * This is only the first step towards a full-features scrub. It reads all | ||||
|  | @ -56,6 +59,17 @@ struct scrub_ctx; | |||
| 
 | ||||
| #define SCRUB_MAX_PAGES			(DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE)) | ||||
| 
 | ||||
| /*
 | ||||
|  * Maximum number of mirrors that can be available for all profiles counting | ||||
|  * the target device of dev-replace as one. During an active device replace | ||||
|  * procedure, the target device of the copy operation is a mirror for the | ||||
|  * filesystem data as well that can be used to read data in order to repair | ||||
|  * read errors on other disks. | ||||
|  * | ||||
|  * Current value is derived from RAID1C4 with 4 copies. | ||||
|  */ | ||||
| #define BTRFS_MAX_MIRRORS (4 + 1) | ||||
| 
 | ||||
| struct scrub_recover { | ||||
| 	refcount_t		refs; | ||||
| 	struct btrfs_io_context	*bioc; | ||||
|  | @ -284,7 +298,7 @@ static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx, | |||
|  * Will also allocate new pages for @sblock if needed. | ||||
|  */ | ||||
| static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, | ||||
| 					       u64 logical, gfp_t gfp) | ||||
| 					       u64 logical) | ||||
| { | ||||
| 	const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT; | ||||
| 	struct scrub_sector *ssector; | ||||
|  | @ -292,7 +306,7 @@ static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, | |||
| 	/* We must never have scrub_block exceed U32_MAX in size. */ | ||||
| 	ASSERT(logical - sblock->logical < U32_MAX); | ||||
| 
 | ||||
| 	ssector = kzalloc(sizeof(*ssector), gfp); | ||||
| 	ssector = kzalloc(sizeof(*ssector), GFP_KERNEL); | ||||
| 	if (!ssector) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -300,7 +314,7 @@ static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, | |||
| 	if (!sblock->pages[page_index]) { | ||||
| 		int ret; | ||||
| 
 | ||||
| 		sblock->pages[page_index] = alloc_page(gfp); | ||||
| 		sblock->pages[page_index] = alloc_page(GFP_KERNEL); | ||||
| 		if (!sblock->pages[page_index]) { | ||||
| 			kfree(ssector); | ||||
| 			return NULL; | ||||
|  | @ -794,8 +808,8 @@ nomem: | |||
| 	return ERR_PTR(-ENOMEM); | ||||
| } | ||||
| 
 | ||||
| static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | ||||
| 				     void *warn_ctx) | ||||
| static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, | ||||
| 				     u64 root, void *warn_ctx) | ||||
| { | ||||
| 	u32 nlink; | ||||
| 	int ret; | ||||
|  | @ -862,7 +876,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
| 		btrfs_warn_in_rcu(fs_info, | ||||
| "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", | ||||
| 				  swarn->errstr, swarn->logical, | ||||
| 				  rcu_str_deref(swarn->dev->name), | ||||
| 				  btrfs_dev_name(swarn->dev), | ||||
| 				  swarn->physical, | ||||
| 				  root, inum, offset, | ||||
| 				  fs_info->sectorsize, nlink, | ||||
|  | @ -876,7 +890,7 @@ err: | |||
| 	btrfs_warn_in_rcu(fs_info, | ||||
| 			  "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", | ||||
| 			  swarn->errstr, swarn->logical, | ||||
| 			  rcu_str_deref(swarn->dev->name), | ||||
| 			  btrfs_dev_name(swarn->dev), | ||||
| 			  swarn->physical, | ||||
| 			  root, inum, offset, ret); | ||||
| 
 | ||||
|  | @ -894,7 +908,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 	struct btrfs_extent_item *ei; | ||||
| 	struct scrub_warning swarn; | ||||
| 	unsigned long ptr = 0; | ||||
| 	u64 extent_item_pos; | ||||
| 	u64 flags = 0; | ||||
| 	u64 ref_root; | ||||
| 	u32 item_size; | ||||
|  | @ -908,8 +921,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 	/* Super block error, no need to search extent tree. */ | ||||
| 	if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) { | ||||
| 		btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", | ||||
| 			errstr, rcu_str_deref(dev->name), | ||||
| 			sblock->physical); | ||||
| 			errstr, btrfs_dev_name(dev), sblock->physical); | ||||
| 		return; | ||||
| 	} | ||||
| 	path = btrfs_alloc_path(); | ||||
|  | @ -926,7 +938,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	extent_item_pos = swarn.logical - found_key.objectid; | ||||
| 	swarn.extent_item_size = found_key.offset; | ||||
| 
 | ||||
| 	eb = path->nodes[0]; | ||||
|  | @ -941,7 +952,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 			btrfs_warn_in_rcu(fs_info, | ||||
| "%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", | ||||
| 				errstr, swarn.logical, | ||||
| 				rcu_str_deref(dev->name), | ||||
| 				btrfs_dev_name(dev), | ||||
| 				swarn.physical, | ||||
| 				ref_level ? "node" : "leaf", | ||||
| 				ret < 0 ? -1 : ref_level, | ||||
|  | @ -949,12 +960,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 		} while (ret != 1); | ||||
| 		btrfs_release_path(path); | ||||
| 	} else { | ||||
| 		struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 
 | ||||
| 		btrfs_release_path(path); | ||||
| 
 | ||||
| 		ctx.bytenr = found_key.objectid; | ||||
| 		ctx.extent_item_pos = swarn.logical - found_key.objectid; | ||||
| 		ctx.fs_info = fs_info; | ||||
| 
 | ||||
| 		swarn.path = path; | ||||
| 		swarn.dev = dev; | ||||
| 		iterate_extent_inodes(fs_info, found_key.objectid, | ||||
| 					extent_item_pos, 1, | ||||
| 					scrub_print_warning_inode, &swarn, false); | ||||
| 
 | ||||
| 		iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn); | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
|  | @ -1358,7 +1375,7 @@ corrected_error: | |||
| 			spin_unlock(&sctx->stat_lock); | ||||
| 			btrfs_err_rl_in_rcu(fs_info, | ||||
| 				"fixed up error at logical %llu on dev %s", | ||||
| 				logical, rcu_str_deref(dev->name)); | ||||
| 				logical, btrfs_dev_name(dev)); | ||||
| 		} | ||||
| 	} else { | ||||
| did_not_correct_error: | ||||
|  | @ -1367,7 +1384,7 @@ did_not_correct_error: | |||
| 		spin_unlock(&sctx->stat_lock); | ||||
| 		btrfs_err_rl_in_rcu(fs_info, | ||||
| 			"unable to fixup (regular) error at logical %llu on dev %s", | ||||
| 			logical, rcu_str_deref(dev->name)); | ||||
| 			logical, btrfs_dev_name(dev)); | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
|  | @ -1480,7 +1497,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, | |||
| 			return -EIO; | ||||
| 		} | ||||
| 
 | ||||
| 		recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); | ||||
| 		recover = kzalloc(sizeof(struct scrub_recover), GFP_KERNEL); | ||||
| 		if (!recover) { | ||||
| 			btrfs_put_bioc(bioc); | ||||
| 			btrfs_bio_counter_dec(fs_info); | ||||
|  | @ -1503,7 +1520,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, | |||
| 			sblock = sblocks_for_recheck[mirror_index]; | ||||
| 			sblock->sctx = sctx; | ||||
| 
 | ||||
| 			sector = alloc_scrub_sector(sblock, logical, GFP_NOFS); | ||||
| 			sector = alloc_scrub_sector(sblock, logical); | ||||
| 			if (!sector) { | ||||
| 				spin_lock(&sctx->stat_lock); | ||||
| 				sctx->stat.malloc_errors++; | ||||
|  | @ -2313,14 +2330,14 @@ static void scrub_missing_raid56_worker(struct work_struct *work) | |||
| 		spin_unlock(&sctx->stat_lock); | ||||
| 		btrfs_err_rl_in_rcu(fs_info, | ||||
| 			"IO error rebuilding logical %llu for dev %s", | ||||
| 			logical, rcu_str_deref(dev->name)); | ||||
| 			logical, btrfs_dev_name(dev)); | ||||
| 	} else if (sblock->header_error || sblock->checksum_error) { | ||||
| 		spin_lock(&sctx->stat_lock); | ||||
| 		sctx->stat.uncorrectable_errors++; | ||||
| 		spin_unlock(&sctx->stat_lock); | ||||
| 		btrfs_err_rl_in_rcu(fs_info, | ||||
| 			"failed to rebuild valid logical %llu for dev %s", | ||||
| 			logical, rcu_str_deref(dev->name)); | ||||
| 			logical, btrfs_dev_name(dev)); | ||||
| 	} else { | ||||
| 		scrub_write_block_to_dev_replace(sblock); | ||||
| 	} | ||||
|  | @ -2425,7 +2442,7 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len, | |||
| 		 */ | ||||
| 		u32 l = min(sectorsize, len); | ||||
| 
 | ||||
| 		sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL); | ||||
| 		sector = alloc_scrub_sector(sblock, logical); | ||||
| 		if (!sector) { | ||||
| 			spin_lock(&sctx->stat_lock); | ||||
| 			sctx->stat.malloc_errors++; | ||||
|  | @ -2756,7 +2773,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity, | |||
| 	for (index = 0; len > 0; index++) { | ||||
| 		struct scrub_sector *sector; | ||||
| 
 | ||||
| 		sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL); | ||||
| 		sector = alloc_scrub_sector(sblock, logical); | ||||
| 		if (!sector) { | ||||
| 			spin_lock(&sctx->stat_lock); | ||||
| 			sctx->stat.malloc_errors++; | ||||
|  | @ -3221,9 +3238,9 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx, | |||
| 		extent_dev = bioc->stripes[0].dev; | ||||
| 		btrfs_put_bioc(bioc); | ||||
| 
 | ||||
| 		ret = btrfs_lookup_csums_range(csum_root, extent_start, | ||||
| 					       extent_start + extent_size - 1, | ||||
| 					       &sctx->csum_list, 1, false); | ||||
| 		ret = btrfs_lookup_csums_list(csum_root, extent_start, | ||||
| 					      extent_start + extent_size - 1, | ||||
| 					      &sctx->csum_list, 1, false); | ||||
| 		if (ret) { | ||||
| 			scrub_parity_mark_sectors_error(sparity, extent_start, | ||||
| 							extent_size); | ||||
|  | @ -3447,7 +3464,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, | |||
| 			    cur_logical; | ||||
| 
 | ||||
| 		if (extent_flags & BTRFS_EXTENT_FLAG_DATA) { | ||||
| 			ret = btrfs_lookup_csums_range(csum_root, cur_logical, | ||||
| 			ret = btrfs_lookup_csums_list(csum_root, cur_logical, | ||||
| 					cur_logical + scrub_len - 1, | ||||
| 					&sctx->csum_list, 1, false); | ||||
| 			if (ret) | ||||
|  | @ -4284,7 +4301,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
| 		mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||||
| 		btrfs_err_in_rcu(fs_info, | ||||
| 			"scrub on devid %llu: filesystem on %s is not writable", | ||||
| 				 devid, rcu_str_deref(dev->name)); | ||||
| 				 devid, btrfs_dev_name(dev)); | ||||
| 		ret = -EROFS; | ||||
| 		goto out; | ||||
| 	} | ||||
|  |  | |||
							
								
								
									
										16
									
								
								fs/btrfs/scrub.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								fs/btrfs/scrub.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_SCRUB_H | ||||
| #define BTRFS_SCRUB_H | ||||
| 
 | ||||
| int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | ||||
| 		    u64 end, struct btrfs_scrub_progress *progress, | ||||
| 		    int readonly, int is_dev_replace); | ||||
| void btrfs_scrub_pause(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_scrub_continue(struct btrfs_fs_info *fs_info); | ||||
| int btrfs_scrub_cancel(struct btrfs_fs_info *info); | ||||
| int btrfs_scrub_cancel_dev(struct btrfs_device *dev); | ||||
| int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, | ||||
| 			 struct btrfs_scrub_progress *progress); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										496
									
								
								fs/btrfs/send.c
									
										
									
									
									
								
							
							
						
						
									
										496
									
								
								fs/btrfs/send.c
									
										
									
									
									
								
							|  | @ -27,6 +27,11 @@ | |||
| #include "compression.h" | ||||
| #include "xattr.h" | ||||
| #include "print-tree.h" | ||||
| #include "accessors.h" | ||||
| #include "dir-item.h" | ||||
| #include "file-item.h" | ||||
| #include "ioctl.h" | ||||
| #include "verity.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Maximum number of references an extent can have in order for us to attempt to | ||||
|  | @ -34,7 +39,7 @@ | |||
|  * avoid hitting limitations of the backreference walking code (taking a lot of | ||||
|  * time and using too much memory for extents with large number of references). | ||||
|  */ | ||||
| #define SEND_MAX_EXTENT_REFS	64 | ||||
| #define SEND_MAX_EXTENT_REFS	1024 | ||||
| 
 | ||||
| /*
 | ||||
|  * A fs_path is a helper to dynamically build path names with unknown size. | ||||
|  | @ -71,13 +76,46 @@ struct clone_root { | |||
| 	struct btrfs_root *root; | ||||
| 	u64 ino; | ||||
| 	u64 offset; | ||||
| 
 | ||||
| 	u64 found_refs; | ||||
| 	u64 num_bytes; | ||||
| 	bool found_ref; | ||||
| }; | ||||
| 
 | ||||
| #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 | ||||
| #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) | ||||
| 
 | ||||
| /*
 | ||||
|  * Limit the root_ids array of struct backref_cache_entry to 12 elements. | ||||
|  * This makes the size of a cache entry to be exactly 128 bytes on x86_64. | ||||
|  * The most common case is to have a single root for cloning, which corresponds | ||||
|  * to the send root. Having the user specify more than 11 clone roots is not | ||||
|  * common, and in such rare cases we simply don't use caching if the number of | ||||
|  * cloning roots that lead down to a leaf is more than 12. | ||||
|  */ | ||||
| #define SEND_MAX_BACKREF_CACHE_ROOTS 12 | ||||
| 
 | ||||
| /*
 | ||||
|  * Max number of entries in the cache. | ||||
|  * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, the size in bytes, excluding | ||||
|  * maple tree's internal nodes, is 16K. | ||||
|  */ | ||||
| #define SEND_MAX_BACKREF_CACHE_SIZE 128 | ||||
| 
 | ||||
| /*
 | ||||
|  * A backref cache entry maps a leaf to a list of IDs of roots from which the | ||||
|  * leaf is accessible and we can use for clone operations. | ||||
|  * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on | ||||
|  * x86_64). | ||||
|  */ | ||||
| struct backref_cache_entry { | ||||
| 	/* List to link to the cache's lru list. */ | ||||
| 	struct list_head list; | ||||
| 	/* The key for this entry in the cache. */ | ||||
| 	u64 key; | ||||
| 	u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS]; | ||||
| 	/* Number of valid elements in the root_ids array. */ | ||||
| 	int num_roots; | ||||
| }; | ||||
| 
 | ||||
| struct send_ctx { | ||||
| 	struct file *send_filp; | ||||
| 	loff_t send_off; | ||||
|  | @ -246,6 +284,14 @@ struct send_ctx { | |||
| 
 | ||||
| 	struct rb_root rbtree_new_refs; | ||||
| 	struct rb_root rbtree_deleted_refs; | ||||
| 
 | ||||
| 	struct { | ||||
| 		u64 last_reloc_trans; | ||||
| 		struct list_head lru_list; | ||||
| 		struct maple_tree entries; | ||||
| 		/* Number of entries stored in the cache. */ | ||||
| 		int size; | ||||
| 	} backref_cache; | ||||
| }; | ||||
| 
 | ||||
| struct pending_dir_move { | ||||
|  | @ -1093,7 +1139,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 		data_len = btrfs_dir_data_len(eb, di); | ||||
| 		btrfs_dir_item_key_to_cpu(eb, di, &di_key); | ||||
| 
 | ||||
| 		if (btrfs_dir_type(eb, di) == BTRFS_FT_XATTR) { | ||||
| 		if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) { | ||||
| 			if (name_len > XATTR_NAME_MAX) { | ||||
| 				ret = -ENAMETOOLONG; | ||||
| 				goto out; | ||||
|  | @ -1236,8 +1282,12 @@ struct backref_ctx { | |||
| 	/* may be truncated in case it's the last extent in a file */ | ||||
| 	u64 extent_len; | ||||
| 
 | ||||
| 	/* Just to check for bugs in backref resolving */ | ||||
| 	int found_itself; | ||||
| 	/* The bytenr the file extent item we are processing refers to. */ | ||||
| 	u64 bytenr; | ||||
| 	/* The owner (root id) of the data backref for the current extent. */ | ||||
| 	u64 backref_owner; | ||||
| 	/* The offset of the data backref for the current extent. */ | ||||
| 	u64 backref_offset; | ||||
| }; | ||||
| 
 | ||||
| static int __clone_root_cmp_bsearch(const void *key, const void *elt) | ||||
|  | @ -1266,32 +1316,33 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2) | |||
| 
 | ||||
| /*
 | ||||
|  * Called for every backref that is found for the current extent. | ||||
|  * Results are collected in sctx->clone_roots->ino/offset/found_refs | ||||
|  * Results are collected in sctx->clone_roots->ino/offset. | ||||
|  */ | ||||
| static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | ||||
| static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id, | ||||
| 			    void *ctx_) | ||||
| { | ||||
| 	struct backref_ctx *bctx = ctx_; | ||||
| 	struct clone_root *found; | ||||
| 	struct clone_root *clone_root; | ||||
| 
 | ||||
| 	/* First check if the root is in the list of accepted clone sources */ | ||||
| 	found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, | ||||
| 			bctx->sctx->clone_roots_cnt, | ||||
| 			sizeof(struct clone_root), | ||||
| 			__clone_root_cmp_bsearch); | ||||
| 	if (!found) | ||||
| 	clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots, | ||||
| 			     bctx->sctx->clone_roots_cnt, | ||||
| 			     sizeof(struct clone_root), | ||||
| 			     __clone_root_cmp_bsearch); | ||||
| 	if (!clone_root) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (found->root == bctx->sctx->send_root && | ||||
| 	/* This is our own reference, bail out as we can't clone from it. */ | ||||
| 	if (clone_root->root == bctx->sctx->send_root && | ||||
| 	    ino == bctx->cur_objectid && | ||||
| 	    offset == bctx->cur_offset) { | ||||
| 		bctx->found_itself = 1; | ||||
| 	} | ||||
| 	    offset == bctx->cur_offset) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Make sure we don't consider clones from send_root that are | ||||
| 	 * behind the current inode/offset. | ||||
| 	 */ | ||||
| 	if (found->root == bctx->sctx->send_root) { | ||||
| 	if (clone_root->root == bctx->sctx->send_root) { | ||||
| 		/*
 | ||||
| 		 * If the source inode was not yet processed we can't issue a | ||||
| 		 * clone operation, as the source extent does not exist yet at | ||||
|  | @ -1312,21 +1363,217 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 	} | ||||
| 
 | ||||
| 	bctx->found++; | ||||
| 	found->found_refs++; | ||||
| 	if (ino < found->ino) { | ||||
| 		found->ino = ino; | ||||
| 		found->offset = offset; | ||||
| 	} else if (found->ino == ino) { | ||||
| 	clone_root->found_ref = true; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the given backref refers to a file extent item with a larger | ||||
| 	 * number of bytes than what we found before, use the new one so that | ||||
| 	 * we clone more optimally and end up doing less writes and getting | ||||
| 	 * less exclusive, non-shared extents at the destination. | ||||
| 	 */ | ||||
| 	if (num_bytes > clone_root->num_bytes) { | ||||
| 		clone_root->ino = ino; | ||||
| 		clone_root->offset = offset; | ||||
| 		clone_root->num_bytes = num_bytes; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * same extent found more then once in the same file. | ||||
| 		 * Found a perfect candidate, so there's no need to continue | ||||
| 		 * backref walking. | ||||
| 		 */ | ||||
| 		if (found->offset > offset + bctx->extent_len) | ||||
| 			found->offset = offset; | ||||
| 		if (num_bytes >= bctx->extent_len) | ||||
| 			return BTRFS_ITERATE_EXTENT_INODES_STOP; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void empty_backref_cache(struct send_ctx *sctx) | ||||
| { | ||||
| 	struct backref_cache_entry *entry; | ||||
| 	struct backref_cache_entry *tmp; | ||||
| 
 | ||||
| 	list_for_each_entry_safe(entry, tmp, &sctx->backref_cache.lru_list, list) | ||||
| 		kfree(entry); | ||||
| 
 | ||||
| 	INIT_LIST_HEAD(&sctx->backref_cache.lru_list); | ||||
| 	mtree_destroy(&sctx->backref_cache.entries); | ||||
| 	sctx->backref_cache.size = 0; | ||||
| } | ||||
| 
 | ||||
| static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx, | ||||
| 				 const u64 **root_ids_ret, int *root_count_ret) | ||||
| { | ||||
| 	struct backref_ctx *bctx = ctx; | ||||
| 	struct send_ctx *sctx = bctx->sctx; | ||||
| 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; | ||||
| 	const u64 key = leaf_bytenr >> fs_info->sectorsize_bits; | ||||
| 	struct backref_cache_entry *entry; | ||||
| 
 | ||||
| 	if (sctx->backref_cache.size == 0) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If relocation happened since we first filled the cache, then we must | ||||
| 	 * empty the cache and can not use it, because even though we operate on | ||||
| 	 * read-only roots, their leaves and nodes may have been reallocated and | ||||
| 	 * now be used for different nodes/leaves of the same tree or some other | ||||
| 	 * tree. | ||||
| 	 * | ||||
| 	 * We are called from iterate_extent_inodes() while either holding a | ||||
| 	 * transaction handle or holding fs_info->commit_root_sem, so no need | ||||
| 	 * to take any lock here. | ||||
| 	 */ | ||||
| 	if (fs_info->last_reloc_trans > sctx->backref_cache.last_reloc_trans) { | ||||
| 		empty_backref_cache(sctx); | ||||
| 		return false; | ||||
| 	} | ||||
| 
 | ||||
| 	entry = mtree_load(&sctx->backref_cache.entries, key); | ||||
| 	if (!entry) | ||||
| 		return false; | ||||
| 
 | ||||
| 	*root_ids_ret = entry->root_ids; | ||||
| 	*root_count_ret = entry->num_roots; | ||||
| 	list_move_tail(&entry->list, &sctx->backref_cache.lru_list); | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids, | ||||
| 				void *ctx) | ||||
| { | ||||
| 	struct backref_ctx *bctx = ctx; | ||||
| 	struct send_ctx *sctx = bctx->sctx; | ||||
| 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; | ||||
| 	struct backref_cache_entry *new_entry; | ||||
| 	struct ulist_iterator uiter; | ||||
| 	struct ulist_node *node; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We're called while holding a transaction handle or while holding | ||||
| 	 * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a | ||||
| 	 * NOFS allocation. | ||||
| 	 */ | ||||
| 	new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS); | ||||
| 	/* No worries, cache is optional. */ | ||||
| 	if (!new_entry) | ||||
| 		return; | ||||
| 
 | ||||
| 	new_entry->key = leaf_bytenr >> fs_info->sectorsize_bits; | ||||
| 	new_entry->num_roots = 0; | ||||
| 	ULIST_ITER_INIT(&uiter); | ||||
| 	while ((node = ulist_next(root_ids, &uiter)) != NULL) { | ||||
| 		const u64 root_id = node->val; | ||||
| 		struct clone_root *root; | ||||
| 
 | ||||
| 		root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots, | ||||
| 			       sctx->clone_roots_cnt, sizeof(struct clone_root), | ||||
| 			       __clone_root_cmp_bsearch); | ||||
| 		if (!root) | ||||
| 			continue; | ||||
| 
 | ||||
| 		/* Too many roots, just exit, no worries as caching is optional. */ | ||||
| 		if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) { | ||||
| 			kfree(new_entry); | ||||
| 			return; | ||||
| 		} | ||||
| 
 | ||||
| 		new_entry->root_ids[new_entry->num_roots] = root_id; | ||||
| 		new_entry->num_roots++; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We may have not added any roots to the new cache entry, which means | ||||
| 	 * none of the roots is part of the list of roots from which we are | ||||
| 	 * allowed to clone. Cache the new entry as it's still useful to avoid | ||||
| 	 * backref walking to determine which roots have a path to the leaf. | ||||
| 	 */ | ||||
| 
 | ||||
| 	if (sctx->backref_cache.size >= SEND_MAX_BACKREF_CACHE_SIZE) { | ||||
| 		struct backref_cache_entry *lru_entry; | ||||
| 		struct backref_cache_entry *mt_entry; | ||||
| 
 | ||||
| 		lru_entry = list_first_entry(&sctx->backref_cache.lru_list, | ||||
| 					     struct backref_cache_entry, list); | ||||
| 		mt_entry = mtree_erase(&sctx->backref_cache.entries, lru_entry->key); | ||||
| 		ASSERT(mt_entry == lru_entry); | ||||
| 		list_del(&mt_entry->list); | ||||
| 		kfree(mt_entry); | ||||
| 		sctx->backref_cache.size--; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = mtree_insert(&sctx->backref_cache.entries, new_entry->key, | ||||
| 			   new_entry, GFP_NOFS); | ||||
| 	ASSERT(ret == 0 || ret == -ENOMEM); | ||||
| 	if (ret) { | ||||
| 		/* Caching is optional, no worries. */ | ||||
| 		kfree(new_entry); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	list_add_tail(&new_entry->list, &sctx->backref_cache.lru_list); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We are called from iterate_extent_inodes() while either holding a | ||||
| 	 * transaction handle or holding fs_info->commit_root_sem, so no need | ||||
| 	 * to take any lock here. | ||||
| 	 */ | ||||
| 	if (sctx->backref_cache.size == 0) | ||||
| 		sctx->backref_cache.last_reloc_trans = fs_info->last_reloc_trans; | ||||
| 
 | ||||
| 	sctx->backref_cache.size++; | ||||
| } | ||||
| 
 | ||||
| static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei, | ||||
| 			     const struct extent_buffer *leaf, void *ctx) | ||||
| { | ||||
| 	const u64 refs = btrfs_extent_refs(leaf, ei); | ||||
| 	const struct backref_ctx *bctx = ctx; | ||||
| 	const struct send_ctx *sctx = bctx->sctx; | ||||
| 
 | ||||
| 	if (bytenr == bctx->bytenr) { | ||||
| 		const u64 flags = btrfs_extent_flags(leaf, ei); | ||||
| 
 | ||||
| 		if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) | ||||
| 			return -EUCLEAN; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If we have only one reference and only the send root as a | ||||
| 		 * clone source - meaning no clone roots were given in the | ||||
| 		 * struct btrfs_ioctl_send_args passed to the send ioctl - then | ||||
| 		 * it's our reference and there's no point in doing backref | ||||
| 		 * walking which is expensive, so exit early. | ||||
| 		 */ | ||||
| 		if (refs == 1 && sctx->clone_roots_cnt == 1) | ||||
| 			return -ENOENT; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Backreference walking (iterate_extent_inodes() below) is currently | ||||
| 	 * too expensive when an extent has a large number of references, both | ||||
| 	 * in time spent and used memory. So for now just fallback to write | ||||
| 	 * operations instead of clone operations when an extent has more than | ||||
| 	 * a certain amount of references. | ||||
| 	 */ | ||||
| 	if (refs > SEND_MAX_EXTENT_REFS) | ||||
| 		return -ENOENT; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx) | ||||
| { | ||||
| 	const struct backref_ctx *bctx = ctx; | ||||
| 
 | ||||
| 	if (ino == bctx->cur_objectid && | ||||
| 	    root == bctx->backref_owner && | ||||
| 	    offset == bctx->backref_offset) | ||||
| 		return true; | ||||
| 
 | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Given an inode, offset and extent item, it finds a good clone for a clone | ||||
|  * instruction. Returns -ENOENT when none could be found. The function makes | ||||
|  | @ -1348,78 +1595,35 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 	u64 logical; | ||||
| 	u64 disk_byte; | ||||
| 	u64 num_bytes; | ||||
| 	u64 extent_item_pos; | ||||
| 	u64 flags = 0; | ||||
| 	struct btrfs_file_extent_item *fi; | ||||
| 	struct extent_buffer *eb = path->nodes[0]; | ||||
| 	struct backref_ctx backref_ctx = {0}; | ||||
| 	struct backref_ctx backref_ctx = { 0 }; | ||||
| 	struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 }; | ||||
| 	struct clone_root *cur_clone_root; | ||||
| 	struct btrfs_key found_key; | ||||
| 	struct btrfs_path *tmp_path; | ||||
| 	struct btrfs_extent_item *ei; | ||||
| 	int compressed; | ||||
| 	u32 i; | ||||
| 
 | ||||
| 	tmp_path = alloc_path_for_send(); | ||||
| 	if (!tmp_path) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	/* We only use this path under the commit sem */ | ||||
| 	tmp_path->need_commit_sem = 0; | ||||
| 
 | ||||
| 	if (data_offset >= ino_size) { | ||||
| 		/*
 | ||||
| 		 * There may be extents that lie behind the file's size. | ||||
| 		 * I at least had this in combination with snapshotting while | ||||
| 		 * writing large files. | ||||
| 		 */ | ||||
| 		ret = 0; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	fi = btrfs_item_ptr(eb, path->slots[0], | ||||
| 			struct btrfs_file_extent_item); | ||||
| 	extent_type = btrfs_file_extent_type(eb, fi); | ||||
| 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||||
| 		ret = -ENOENT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	compressed = btrfs_file_extent_compression(eb, fi); | ||||
| 
 | ||||
| 	num_bytes = btrfs_file_extent_num_bytes(eb, fi); | ||||
| 	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||||
| 	if (disk_byte == 0) { | ||||
| 		ret = -ENOENT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	logical = disk_byte + btrfs_file_extent_offset(eb, fi); | ||||
| 
 | ||||
| 	down_read(&fs_info->commit_root_sem); | ||||
| 	ret = extent_from_logical(fs_info, disk_byte, tmp_path, | ||||
| 				  &found_key, &flags); | ||||
| 	up_read(&fs_info->commit_root_sem); | ||||
| 
 | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||||
| 		ret = -EIO; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], | ||||
| 			    struct btrfs_extent_item); | ||||
| 	/*
 | ||||
| 	 * Backreference walking (iterate_extent_inodes() below) is currently | ||||
| 	 * too expensive when an extent has a large number of references, both | ||||
| 	 * in time spent and used memory. So for now just fallback to write | ||||
| 	 * operations instead of clone operations when an extent has more than | ||||
| 	 * a certain amount of references. | ||||
| 	 * With fallocate we can get prealloc extents beyond the inode's i_size, | ||||
| 	 * so we don't do anything here because clone operations can not clone | ||||
| 	 * to a range beyond i_size without increasing the i_size of the | ||||
| 	 * destination inode. | ||||
| 	 */ | ||||
| 	if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) { | ||||
| 		ret = -ENOENT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	btrfs_release_path(tmp_path); | ||||
| 	if (data_offset >= ino_size) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item); | ||||
| 	extent_type = btrfs_file_extent_type(eb, fi); | ||||
| 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||||
| 		return -ENOENT; | ||||
| 
 | ||||
| 	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||||
| 	if (disk_byte == 0) | ||||
| 		return -ENOENT; | ||||
| 
 | ||||
| 	compressed = btrfs_file_extent_compression(eb, fi); | ||||
| 	num_bytes = btrfs_file_extent_num_bytes(eb, fi); | ||||
| 	logical = disk_byte + btrfs_file_extent_offset(eb, fi); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Setup the clone roots. | ||||
|  | @ -1428,37 +1632,59 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 		cur_clone_root = sctx->clone_roots + i; | ||||
| 		cur_clone_root->ino = (u64)-1; | ||||
| 		cur_clone_root->offset = 0; | ||||
| 		cur_clone_root->found_refs = 0; | ||||
| 		cur_clone_root->num_bytes = 0; | ||||
| 		cur_clone_root->found_ref = false; | ||||
| 	} | ||||
| 
 | ||||
| 	backref_ctx.sctx = sctx; | ||||
| 	backref_ctx.found = 0; | ||||
| 	backref_ctx.cur_objectid = ino; | ||||
| 	backref_ctx.cur_offset = data_offset; | ||||
| 	backref_ctx.found_itself = 0; | ||||
| 	backref_ctx.extent_len = num_bytes; | ||||
| 	backref_ctx.bytenr = disk_byte; | ||||
| 	/*
 | ||||
| 	 * Use the header owner and not the send root's id, because in case of a | ||||
| 	 * snapshot we can have shared subtrees. | ||||
| 	 */ | ||||
| 	backref_ctx.backref_owner = btrfs_header_owner(eb); | ||||
| 	backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The last extent of a file may be too large due to page alignment. | ||||
| 	 * We need to adjust extent_len in this case so that the checks in | ||||
| 	 * __iterate_backrefs work. | ||||
| 	 * iterate_backrefs() work. | ||||
| 	 */ | ||||
| 	if (data_offset + num_bytes >= ino_size) | ||||
| 		backref_ctx.extent_len = ino_size - data_offset; | ||||
| 	else | ||||
| 		backref_ctx.extent_len = num_bytes; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now collect all backrefs. | ||||
| 	 */ | ||||
| 	backref_walk_ctx.bytenr = disk_byte; | ||||
| 	if (compressed == BTRFS_COMPRESS_NONE) | ||||
| 		extent_item_pos = logical - found_key.objectid; | ||||
| 	else | ||||
| 		extent_item_pos = 0; | ||||
| 	ret = iterate_extent_inodes(fs_info, found_key.objectid, | ||||
| 				    extent_item_pos, 1, __iterate_backrefs, | ||||
| 				    &backref_ctx, false); | ||||
| 		backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi); | ||||
| 	backref_walk_ctx.fs_info = fs_info; | ||||
| 	backref_walk_ctx.cache_lookup = lookup_backref_cache; | ||||
| 	backref_walk_ctx.cache_store = store_backref_cache; | ||||
| 	backref_walk_ctx.indirect_ref_iterator = iterate_backrefs; | ||||
| 	backref_walk_ctx.check_extent_item = check_extent_item; | ||||
| 	backref_walk_ctx.user_ctx = &backref_ctx; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If have a single clone root, then it's the send root and we can tell | ||||
| 	 * the backref walking code to skip our own backref and not resolve it, | ||||
| 	 * since we can not use it for cloning - the source and destination | ||||
| 	 * ranges can't overlap and in case the leaf is shared through a subtree | ||||
| 	 * due to snapshots, we can't use those other roots since they are not | ||||
| 	 * in the list of clone roots. | ||||
| 	 */ | ||||
| 	if (sctx->clone_roots_cnt == 1) | ||||
| 		backref_walk_ctx.skip_data_ref = skip_self_data_ref; | ||||
| 
 | ||||
| 	ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs, | ||||
| 				    &backref_ctx); | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 		return ret; | ||||
| 
 | ||||
| 	down_read(&fs_info->commit_root_sem); | ||||
| 	if (fs_info->last_reloc_trans > sctx->last_reloc_trans) { | ||||
|  | @ -1475,37 +1701,42 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 		 * was already reallocated after the relocation. | ||||
| 		 */ | ||||
| 		up_read(&fs_info->commit_root_sem); | ||||
| 		ret = -ENOENT; | ||||
| 		goto out; | ||||
| 		return -ENOENT; | ||||
| 	} | ||||
| 	up_read(&fs_info->commit_root_sem); | ||||
| 
 | ||||
| 	if (!backref_ctx.found_itself) { | ||||
| 		/* found a bug in backref code? */ | ||||
| 		ret = -EIO; | ||||
| 		btrfs_err(fs_info, | ||||
| 			  "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu", | ||||
| 			  ino, data_offset, disk_byte, found_key.objectid); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	btrfs_debug(fs_info, | ||||
| 		    "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu", | ||||
| 		    data_offset, ino, num_bytes, logical); | ||||
| 
 | ||||
| 	if (!backref_ctx.found) | ||||
| 	if (!backref_ctx.found) { | ||||
| 		btrfs_debug(fs_info, "no clones found"); | ||||
| 		return -ENOENT; | ||||
| 	} | ||||
| 
 | ||||
| 	cur_clone_root = NULL; | ||||
| 	for (i = 0; i < sctx->clone_roots_cnt; i++) { | ||||
| 		if (sctx->clone_roots[i].found_refs) { | ||||
| 			if (!cur_clone_root) | ||||
| 				cur_clone_root = sctx->clone_roots + i; | ||||
| 			else if (sctx->clone_roots[i].root == sctx->send_root) | ||||
| 				/* prefer clones from send_root over others */ | ||||
| 				cur_clone_root = sctx->clone_roots + i; | ||||
| 		} | ||||
| 		struct clone_root *clone_root = &sctx->clone_roots[i]; | ||||
| 
 | ||||
| 		if (!clone_root->found_ref) | ||||
| 			continue; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Choose the root from which we can clone more bytes, to | ||||
| 		 * minimize write operations and therefore have more extent | ||||
| 		 * sharing at the destination (the same as in the source). | ||||
| 		 */ | ||||
| 		if (!cur_clone_root || | ||||
| 		    clone_root->num_bytes > cur_clone_root->num_bytes) { | ||||
| 			cur_clone_root = clone_root; | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * We found an optimal clone candidate (any inode from | ||||
| 			 * any root is fine), so we're done. | ||||
| 			 */ | ||||
| 			if (clone_root->num_bytes >= backref_ctx.extent_len) | ||||
| 				break; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (cur_clone_root) { | ||||
|  | @ -1515,8 +1746,6 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 		ret = -ENOENT; | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	btrfs_free_path(tmp_path); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -1596,13 +1825,17 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	while (1) { | ||||
| 		struct fscrypt_str tmp_name; | ||||
| 
 | ||||
| 		len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", | ||||
| 				ino, gen, idx); | ||||
| 		ASSERT(len < sizeof(tmp)); | ||||
| 		tmp_name.name = tmp; | ||||
| 		tmp_name.len = strlen(tmp); | ||||
| 
 | ||||
| 		di = btrfs_lookup_dir_item(NULL, sctx->send_root, | ||||
| 				path, BTRFS_FIRST_FREE_OBJECTID, | ||||
| 				tmp, strlen(tmp), 0); | ||||
| 				&tmp_name, 0); | ||||
| 		btrfs_release_path(path); | ||||
| 		if (IS_ERR(di)) { | ||||
| 			ret = PTR_ERR(di); | ||||
|  | @ -1622,7 +1855,7 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
| 
 | ||||
| 		di = btrfs_lookup_dir_item(NULL, sctx->parent_root, | ||||
| 				path, BTRFS_FIRST_FREE_OBJECTID, | ||||
| 				tmp, strlen(tmp), 0); | ||||
| 				&tmp_name, 0); | ||||
| 		btrfs_release_path(path); | ||||
| 		if (IS_ERR(di)) { | ||||
| 			ret = PTR_ERR(di); | ||||
|  | @ -1752,13 +1985,13 @@ static int lookup_dir_item_inode(struct btrfs_root *root, | |||
| 	struct btrfs_dir_item *di; | ||||
| 	struct btrfs_key key; | ||||
| 	struct btrfs_path *path; | ||||
| 	struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len); | ||||
| 
 | ||||
| 	path = alloc_path_for_send(); | ||||
| 	if (!path) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	di = btrfs_lookup_dir_item(NULL, root, path, | ||||
| 			dir, name, name_len, 0); | ||||
| 	di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0); | ||||
| 	if (IS_ERR_OR_NULL(di)) { | ||||
| 		ret = di ? PTR_ERR(di) : -ENOENT; | ||||
| 		goto out; | ||||
|  | @ -7863,6 +8096,9 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) | |||
| 	INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); | ||||
| 	INIT_LIST_HEAD(&sctx->name_cache_list); | ||||
| 
 | ||||
| 	INIT_LIST_HEAD(&sctx->backref_cache.lru_list); | ||||
| 	mt_init(&sctx->backref_cache.entries); | ||||
| 
 | ||||
| 	sctx->flags = arg->flags; | ||||
| 
 | ||||
| 	if (arg->flags & BTRFS_SEND_FLAG_VERSION) { | ||||
|  | @ -7901,7 +8137,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) | |||
| 	if (sctx->proto >= 2) { | ||||
| 		u32 send_buf_num_pages; | ||||
| 
 | ||||
| 		sctx->send_max_size = ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED, PAGE_SIZE); | ||||
| 		sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2; | ||||
| 		sctx->send_buf = vmalloc(sctx->send_max_size); | ||||
| 		if (!sctx->send_buf) { | ||||
| 			ret = -ENOMEM; | ||||
|  | @ -8125,6 +8361,8 @@ out: | |||
| 
 | ||||
| 		close_current_inode(sctx); | ||||
| 
 | ||||
| 		empty_backref_cache(sctx); | ||||
| 
 | ||||
| 		kfree(sctx); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -18,10 +18,12 @@ | |||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * In send stream v1, no command is larger than 64K. In send stream v2, no limit | ||||
|  * should be assumed. | ||||
|  * In send stream v1, no command is larger than 64K. In send stream v2, no | ||||
|  * limit should be assumed, the buffer size is set to be a header with | ||||
|  * compressed extent size. | ||||
|  */ | ||||
| #define BTRFS_SEND_BUF_SIZE_V1				SZ_64K | ||||
| #define BTRFS_SEND_BUF_SIZE_V2	ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED, PAGE_SIZE) | ||||
| 
 | ||||
| struct inode; | ||||
| struct btrfs_ioctl_send_args; | ||||
|  |  | |||
|  | @ -10,6 +10,9 @@ | |||
| #include "transaction.h" | ||||
| #include "block-group.h" | ||||
| #include "zoned.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * HOW DOES SPACE RESERVATION WORK | ||||
|  | @ -856,7 +859,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, | |||
| 	u64 thresh; | ||||
| 	u64 used; | ||||
| 
 | ||||
| 	thresh = div_factor_fine(total, 90); | ||||
| 	thresh = mult_perc(total, 90); | ||||
| 
 | ||||
| 	lockdep_assert_held(&space_info->lock); | ||||
| 
 | ||||
|  | @ -974,7 +977,7 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, | |||
| 		return false; | ||||
| 
 | ||||
| 	spin_lock(&global_rsv->lock); | ||||
| 	min_bytes = div_factor(global_rsv->size, 1); | ||||
| 	min_bytes = mult_perc(global_rsv->size, 10); | ||||
| 	if (global_rsv->reserved < min_bytes + ticket->bytes) { | ||||
| 		spin_unlock(&global_rsv->lock); | ||||
| 		return false; | ||||
|  | @ -1490,8 +1493,8 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info, | |||
| 	spin_unlock(&space_info->lock); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Do the appropriate flushing and waiting for a ticket | ||||
| /*
 | ||||
|  * Do the appropriate flushing and waiting for a ticket. | ||||
|  * | ||||
|  * @fs_info:    the filesystem | ||||
|  * @space_info: space info for the reservation | ||||
|  | @ -1583,8 +1586,18 @@ static inline bool can_steal(enum btrfs_reserve_flush_enum flush) | |||
| 		flush == BTRFS_RESERVE_FLUSH_EVICT); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Try to reserve bytes from the block_rsv's space | ||||
| /*
 | ||||
|  * NO_FLUSH and FLUSH_EMERGENCY don't want to create a ticket, they just want to | ||||
|  * fail as quickly as possible. | ||||
|  */ | ||||
| static inline bool can_ticket(enum btrfs_reserve_flush_enum flush) | ||||
| { | ||||
| 	return (flush != BTRFS_RESERVE_NO_FLUSH && | ||||
| 		flush != BTRFS_RESERVE_FLUSH_EMERGENCY); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Try to reserve bytes from the block_rsv's space. | ||||
|  * | ||||
|  * @fs_info:    the filesystem | ||||
|  * @space_info: space info we want to allocate from | ||||
|  | @ -1644,6 +1657,21 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, | |||
| 		ret = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Things are dire, we need to make a reservation so we don't abort.  We | ||||
| 	 * will let this reservation go through as long as we have actual space | ||||
| 	 * left to allocate for the block. | ||||
| 	 */ | ||||
| 	if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) { | ||||
| 		used = btrfs_space_info_used(space_info, false); | ||||
| 		if (used + orig_bytes <= | ||||
| 		    writable_total_bytes(fs_info, space_info)) { | ||||
| 			btrfs_space_info_update_bytes_may_use(fs_info, space_info, | ||||
| 							      orig_bytes); | ||||
| 			ret = 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we couldn't make a reservation then setup our reservation ticket | ||||
| 	 * and kick the async worker if it's not already running. | ||||
|  | @ -1651,7 +1679,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, | |||
| 	 * If we are a priority flusher then we just need to add our ticket to | ||||
| 	 * the list and we will do our own flushing further down. | ||||
| 	 */ | ||||
| 	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | ||||
| 	if (ret && can_ticket(flush)) { | ||||
| 		ticket.bytes = orig_bytes; | ||||
| 		ticket.error = 0; | ||||
| 		space_info->reclaim_size += ticket.bytes; | ||||
|  | @ -1701,15 +1729,15 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, | |||
| 		} | ||||
| 	} | ||||
| 	spin_unlock(&space_info->lock); | ||||
| 	if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | ||||
| 	if (!ret || !can_ticket(flush)) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns, | ||||
| 				     orig_bytes, flush); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Trye to reserve metadata bytes from the block_rsv's space | ||||
| /*
 | ||||
|  * Try to reserve metadata bytes from the block_rsv's space. | ||||
|  * | ||||
|  * @fs_info:    the filesystem | ||||
|  * @block_rsv:  block_rsv we're allocating for | ||||
|  | @ -1743,8 +1771,8 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Try to reserve data bytes for an allocation | ||||
| /*
 | ||||
|  * Try to reserve data bytes for an allocation. | ||||
|  * | ||||
|  * @fs_info: the filesystem | ||||
|  * @bytes:   number of bytes we need | ||||
|  | @ -1787,3 +1815,37 @@ __cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info) | |||
| 	} | ||||
| 	dump_global_block_rsv(fs_info); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Account the unused space of all the readonly block group in the space_info. | ||||
|  * takes mirrors into account. | ||||
|  */ | ||||
| u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||||
| { | ||||
| 	struct btrfs_block_group *block_group; | ||||
| 	u64 free_bytes = 0; | ||||
| 	int factor; | ||||
| 
 | ||||
| 	/* It's df, we don't care if it's racy */ | ||||
| 	if (list_empty(&sinfo->ro_bgs)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock(&sinfo->lock); | ||||
| 	list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) { | ||||
| 		spin_lock(&block_group->lock); | ||||
| 
 | ||||
| 		if (!block_group->ro) { | ||||
| 			spin_unlock(&block_group->lock); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		factor = btrfs_bg_type_to_factor(block_group->flags); | ||||
| 		free_bytes += (block_group->length - | ||||
| 			       block_group->used) * factor; | ||||
| 
 | ||||
| 		spin_unlock(&block_group->lock); | ||||
| 	} | ||||
| 	spin_unlock(&sinfo->lock); | ||||
| 
 | ||||
| 	return free_bytes; | ||||
| } | ||||
|  |  | |||
|  | @ -5,6 +5,83 @@ | |||
| 
 | ||||
| #include "volumes.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Different levels for to flush space when doing space reservations. | ||||
|  * | ||||
|  * The higher the level, the more methods we try to reclaim space. | ||||
|  */ | ||||
| enum btrfs_reserve_flush_enum { | ||||
| 	/* If we are in the transaction, we can't flush anything.*/ | ||||
| 	BTRFS_RESERVE_NO_FLUSH, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Flush space by: | ||||
| 	 * - Running delayed inode items | ||||
| 	 * - Allocating a new chunk | ||||
| 	 */ | ||||
| 	BTRFS_RESERVE_FLUSH_LIMIT, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Flush space by: | ||||
| 	 * - Running delayed inode items | ||||
| 	 * - Running delayed refs | ||||
| 	 * - Running delalloc and waiting for ordered extents | ||||
| 	 * - Allocating a new chunk | ||||
| 	 */ | ||||
| 	BTRFS_RESERVE_FLUSH_EVICT, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Flush space by above mentioned methods and by: | ||||
| 	 * - Running delayed iputs | ||||
| 	 * - Committing transaction | ||||
| 	 * | ||||
| 	 * Can be interrupted by a fatal signal. | ||||
| 	 */ | ||||
| 	BTRFS_RESERVE_FLUSH_DATA, | ||||
| 	BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE, | ||||
| 	BTRFS_RESERVE_FLUSH_ALL, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Pretty much the same as FLUSH_ALL, but can also steal space from | ||||
| 	 * global rsv. | ||||
| 	 * | ||||
| 	 * Can be interrupted by a fatal signal. | ||||
| 	 */ | ||||
| 	BTRFS_RESERVE_FLUSH_ALL_STEAL, | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This is for btrfs_use_block_rsv only.  We have exhausted our block | ||||
| 	 * rsv and our global block rsv.  This can happen for things like | ||||
| 	 * delalloc where we are overwriting a lot of extents with a single | ||||
| 	 * extent and didn't reserve enough space.  Alternatively it can happen | ||||
| 	 * with delalloc where we reserve 1 extents worth for a large extent but | ||||
| 	 * fragmentation leads to multiple extents being created.  This will | ||||
| 	 * give us the reservation in the case of | ||||
| 	 * | ||||
| 	 * if (num_bytes < (space_info->total_bytes - | ||||
| 	 *		    btrfs_space_info_used(space_info, false)) | ||||
| 	 * | ||||
| 	 * Which ignores bytes_may_use.  This is potentially dangerous, but our | ||||
| 	 * reservation system is generally pessimistic so is able to absorb this | ||||
| 	 * style of mistake. | ||||
| 	 */ | ||||
| 	BTRFS_RESERVE_FLUSH_EMERGENCY, | ||||
| }; | ||||
| 
 | ||||
| enum btrfs_flush_state { | ||||
| 	FLUSH_DELAYED_ITEMS_NR	= 1, | ||||
| 	FLUSH_DELAYED_ITEMS	= 2, | ||||
| 	FLUSH_DELAYED_REFS_NR	= 3, | ||||
| 	FLUSH_DELAYED_REFS	= 4, | ||||
| 	FLUSH_DELALLOC		= 5, | ||||
| 	FLUSH_DELALLOC_WAIT	= 6, | ||||
| 	FLUSH_DELALLOC_FULL	= 7, | ||||
| 	ALLOC_CHUNK		= 8, | ||||
| 	ALLOC_CHUNK_FORCE	= 9, | ||||
| 	RUN_DELAYED_IPUTS	= 10, | ||||
| 	COMMIT_TRANS		= 11, | ||||
| }; | ||||
| 
 | ||||
| struct btrfs_space_info { | ||||
| 	spinlock_t lock; | ||||
| 
 | ||||
|  | @ -159,5 +236,6 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes, | |||
| 			     enum btrfs_reserve_flush_enum flush); | ||||
| void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info); | ||||
| u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||||
| 
 | ||||
| #endif /* BTRFS_SPACE_INFO_H */ | ||||
|  |  | |||
|  | @ -1,6 +1,7 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| 
 | ||||
| #include <linux/slab.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "subpage.h" | ||||
| #include "btrfs_inode.h" | ||||
|  |  | |||
							
								
								
									
										554
									
								
								fs/btrfs/super.c
									
										
									
									
									
								
							
							
						
						
									
										554
									
								
								fs/btrfs/super.c
									
										
									
									
									
								
							|  | @ -26,6 +26,7 @@ | |||
| #include <linux/ratelimit.h> | ||||
| #include <linux/crc32c.h> | ||||
| #include <linux/btrfs.h> | ||||
| #include "messages.h" | ||||
| #include "delayed-inode.h" | ||||
| #include "ctree.h" | ||||
| #include "disk-io.h" | ||||
|  | @ -34,7 +35,7 @@ | |||
| #include "print-tree.h" | ||||
| #include "props.h" | ||||
| #include "xattr.h" | ||||
| #include "volumes.h" | ||||
| #include "bio.h" | ||||
| #include "export.h" | ||||
| #include "compression.h" | ||||
| #include "rcu-string.h" | ||||
|  | @ -49,6 +50,14 @@ | |||
| #include "discard.h" | ||||
| #include "qgroup.h" | ||||
| #include "raid56.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "defrag.h" | ||||
| #include "dir-item.h" | ||||
| #include "ioctl.h" | ||||
| #include "scrub.h" | ||||
| #include "verity.h" | ||||
| #include "super.h" | ||||
| #define CREATE_TRACE_POINTS | ||||
| #include <trace/events/btrfs.h> | ||||
| 
 | ||||
|  | @ -67,328 +76,6 @@ static struct file_system_type btrfs_root_fs_type; | |||
| 
 | ||||
| static int btrfs_remount(struct super_block *sb, int *flags, char *data); | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| 
 | ||||
| #define STATE_STRING_PREFACE	": state " | ||||
| #define STATE_STRING_BUF_LEN	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT) | ||||
| 
 | ||||
| /*
 | ||||
|  * Characters to print to indicate error conditions or uncommon filesystem state. | ||||
|  * RO is not an error. | ||||
|  */ | ||||
| static const char fs_state_chars[] = { | ||||
| 	[BTRFS_FS_STATE_ERROR]			= 'E', | ||||
| 	[BTRFS_FS_STATE_REMOUNTING]		= 'M', | ||||
| 	[BTRFS_FS_STATE_RO]			= 0, | ||||
| 	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A', | ||||
| 	[BTRFS_FS_STATE_DEV_REPLACING]		= 'R', | ||||
| 	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= 0, | ||||
| 	[BTRFS_FS_STATE_NO_CSUMS]		= 'C', | ||||
| 	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= 'L', | ||||
| }; | ||||
| 
 | ||||
| static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf) | ||||
| { | ||||
| 	unsigned int bit; | ||||
| 	bool states_printed = false; | ||||
| 	unsigned long fs_state = READ_ONCE(info->fs_state); | ||||
| 	char *curr = buf; | ||||
| 
 | ||||
| 	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE)); | ||||
| 	curr += sizeof(STATE_STRING_PREFACE) - 1; | ||||
| 
 | ||||
| 	for_each_set_bit(bit, &fs_state, sizeof(fs_state)) { | ||||
| 		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT); | ||||
| 		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) { | ||||
| 			*curr++ = fs_state_chars[bit]; | ||||
| 			states_printed = true; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* If no states were printed, reset the buffer */ | ||||
| 	if (!states_printed) | ||||
| 		curr = buf; | ||||
| 
 | ||||
| 	*curr++ = 0; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Generally the error codes correspond to their respective errors, but there | ||||
|  * are a few special cases. | ||||
|  * | ||||
|  * EUCLEAN: Any sort of corruption that we encounter.  The tree-checker for | ||||
|  *          instance will return EUCLEAN if any of the blocks are corrupted in | ||||
|  *          a way that is problematic.  We want to reserve EUCLEAN for these | ||||
|  *          sort of corruptions. | ||||
|  * | ||||
|  * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we | ||||
|  *        need to use EROFS for this case.  We will have no idea of the | ||||
|  *        original failure, that will have been reported at the time we tripped | ||||
|  *        over the error.  Each subsequent error that doesn't have any context | ||||
|  *        of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR. | ||||
|  */ | ||||
| const char * __attribute_const__ btrfs_decode_error(int errno) | ||||
| { | ||||
| 	char *errstr = "unknown"; | ||||
| 
 | ||||
| 	switch (errno) { | ||||
| 	case -ENOENT:		/* -2 */ | ||||
| 		errstr = "No such entry"; | ||||
| 		break; | ||||
| 	case -EIO:		/* -5 */ | ||||
| 		errstr = "IO failure"; | ||||
| 		break; | ||||
| 	case -ENOMEM:		/* -12*/ | ||||
| 		errstr = "Out of memory"; | ||||
| 		break; | ||||
| 	case -EEXIST:		/* -17 */ | ||||
| 		errstr = "Object already exists"; | ||||
| 		break; | ||||
| 	case -ENOSPC:		/* -28 */ | ||||
| 		errstr = "No space left"; | ||||
| 		break; | ||||
| 	case -EROFS:		/* -30 */ | ||||
| 		errstr = "Readonly filesystem"; | ||||
| 		break; | ||||
| 	case -EOPNOTSUPP:	/* -95 */ | ||||
| 		errstr = "Operation not supported"; | ||||
| 		break; | ||||
| 	case -EUCLEAN:		/* -117 */ | ||||
| 		errstr = "Filesystem corrupted"; | ||||
| 		break; | ||||
| 	case -EDQUOT:		/* -122 */ | ||||
| 		errstr = "Quota exceeded"; | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return errstr; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * __btrfs_handle_fs_error decodes expected errors from the caller and | ||||
|  * invokes the appropriate error response. | ||||
|  */ | ||||
| __cold | ||||
| void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, | ||||
| 		       unsigned int line, int errno, const char *fmt, ...) | ||||
| { | ||||
| 	struct super_block *sb = fs_info->sb; | ||||
| #ifdef CONFIG_PRINTK | ||||
| 	char statestr[STATE_STRING_BUF_LEN]; | ||||
| 	const char *errstr; | ||||
| #endif | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Special case: if the error is EROFS, and we're already | ||||
| 	 * under SB_RDONLY, then it is safe here. | ||||
| 	 */ | ||||
| 	if (errno == -EROFS && sb_rdonly(sb)) | ||||
|   		return; | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| 	errstr = btrfs_decode_error(errno); | ||||
| 	btrfs_state_to_string(fs_info, statestr); | ||||
| 	if (fmt) { | ||||
| 		struct va_format vaf; | ||||
| 		va_list args; | ||||
| 
 | ||||
| 		va_start(args, fmt); | ||||
| 		vaf.fmt = fmt; | ||||
| 		vaf.va = &args; | ||||
| 
 | ||||
| 		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n", | ||||
| 			sb->s_id, statestr, function, line, errno, errstr, &vaf); | ||||
| 		va_end(args); | ||||
| 	} else { | ||||
| 		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n", | ||||
| 			sb->s_id, statestr, function, line, errno, errstr); | ||||
| 	} | ||||
| #endif | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Today we only save the error info to memory.  Long term we'll | ||||
| 	 * also send it down to the disk | ||||
| 	 */ | ||||
| 	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); | ||||
| 
 | ||||
| 	/* Don't go through full error handling during mount */ | ||||
| 	if (!(sb->s_flags & SB_BORN)) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (sb_rdonly(sb)) | ||||
| 		return; | ||||
| 
 | ||||
| 	btrfs_discard_stop(fs_info); | ||||
| 
 | ||||
| 	/* btrfs handle error by forcing the filesystem readonly */ | ||||
| 	btrfs_set_sb_rdonly(sb); | ||||
| 	btrfs_info(fs_info, "forced readonly"); | ||||
| 	/*
 | ||||
| 	 * Note that a running device replace operation is not canceled here | ||||
| 	 * although there is no way to update the progress. It would add the | ||||
| 	 * risk of a deadlock, therefore the canceling is omitted. The only | ||||
| 	 * penalty is that some I/O remains active until the procedure | ||||
| 	 * completes. The next time when the filesystem is mounted writable | ||||
| 	 * again, the device replace operation continues. | ||||
| 	 */ | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PRINTK | ||||
| static const char * const logtypes[] = { | ||||
| 	"emergency", | ||||
| 	"alert", | ||||
| 	"critical", | ||||
| 	"error", | ||||
| 	"warning", | ||||
| 	"notice", | ||||
| 	"info", | ||||
| 	"debug", | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * Use one ratelimit state per log level so that a flood of less important | ||||
|  * messages doesn't cause more important ones to be dropped. | ||||
|  */ | ||||
| static struct ratelimit_state printk_limits[] = { | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| 	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100), | ||||
| }; | ||||
| 
 | ||||
| void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) | ||||
| { | ||||
| 	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0"; | ||||
| 	struct va_format vaf; | ||||
| 	va_list args; | ||||
| 	int kern_level; | ||||
| 	const char *type = logtypes[4]; | ||||
| 	struct ratelimit_state *ratelimit = &printk_limits[4]; | ||||
| 
 | ||||
| 	va_start(args, fmt); | ||||
| 
 | ||||
| 	while ((kern_level = printk_get_level(fmt)) != 0) { | ||||
| 		size_t size = printk_skip_level(fmt) - fmt; | ||||
| 
 | ||||
| 		if (kern_level >= '0' && kern_level <= '7') { | ||||
| 			memcpy(lvl, fmt,  size); | ||||
| 			lvl[size] = '\0'; | ||||
| 			type = logtypes[kern_level - '0']; | ||||
| 			ratelimit = &printk_limits[kern_level - '0']; | ||||
| 		} | ||||
| 		fmt += size; | ||||
| 	} | ||||
| 
 | ||||
| 	vaf.fmt = fmt; | ||||
| 	vaf.va = &args; | ||||
| 
 | ||||
| 	if (__ratelimit(ratelimit)) { | ||||
| 		if (fs_info) { | ||||
| 			char statestr[STATE_STRING_BUF_LEN]; | ||||
| 
 | ||||
| 			btrfs_state_to_string(fs_info, statestr); | ||||
| 			_printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type, | ||||
| 				fs_info->sb->s_id, statestr, &vaf); | ||||
| 		} else { | ||||
| 			_printk("%sBTRFS %s: %pV\n", lvl, type, &vaf); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	va_end(args); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #if BITS_PER_LONG == 32 | ||||
| void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (!test_and_set_bit(BTRFS_FS_32BIT_WARN, &fs_info->flags)) { | ||||
| 		btrfs_warn(fs_info, "reaching 32bit limit for logical addresses"); | ||||
| 		btrfs_warn(fs_info, | ||||
| "due to page cache limit on 32bit systems, btrfs can't access metadata at or beyond %lluT", | ||||
| 			   BTRFS_32BIT_MAX_FILE_SIZE >> 40); | ||||
| 		btrfs_warn(fs_info, | ||||
| 			   "please consider upgrading to 64bit kernel/hardware"); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	if (!test_and_set_bit(BTRFS_FS_32BIT_ERROR, &fs_info->flags)) { | ||||
| 		btrfs_err(fs_info, "reached 32bit limit for logical addresses"); | ||||
| 		btrfs_err(fs_info, | ||||
| "due to page cache limit on 32bit systems, metadata beyond %lluT can't be accessed", | ||||
| 			  BTRFS_32BIT_MAX_FILE_SIZE >> 40); | ||||
| 		btrfs_err(fs_info, | ||||
| 			   "please consider upgrading to 64bit kernel/hardware"); | ||||
| 	} | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * We only mark the transaction aborted and then set the file system read-only. | ||||
|  * This will prevent new transactions from starting or trying to join this | ||||
|  * one. | ||||
|  * | ||||
|  * This means that error recovery at the call site is limited to freeing | ||||
|  * any local memory allocations and passing the error code up without | ||||
|  * further cleanup. The transaction should complete as it normally would | ||||
|  * in the call path but will return -EIO. | ||||
|  * | ||||
|  * We'll complete the cleanup in btrfs_end_transaction and | ||||
|  * btrfs_commit_transaction. | ||||
|  */ | ||||
| __cold | ||||
| void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | ||||
| 			       const char *function, | ||||
| 			       unsigned int line, int errno, bool first_hit) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = trans->fs_info; | ||||
| 
 | ||||
| 	WRITE_ONCE(trans->aborted, errno); | ||||
| 	WRITE_ONCE(trans->transaction->aborted, errno); | ||||
| 	if (first_hit && errno == -ENOSPC) | ||||
| 		btrfs_dump_space_info_for_trans_abort(fs_info); | ||||
| 	/* Wake up anybody who may be waiting on this transaction */ | ||||
| 	wake_up(&fs_info->transaction_wait); | ||||
| 	wake_up(&fs_info->transaction_blocked_wait); | ||||
| 	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL); | ||||
| } | ||||
| /*
 | ||||
|  * __btrfs_panic decodes unexpected, fatal errors from the caller, | ||||
|  * issues an alert, and either panics or BUGs, depending on mount options. | ||||
|  */ | ||||
| __cold | ||||
| void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | ||||
| 		   unsigned int line, int errno, const char *fmt, ...) | ||||
| { | ||||
| 	char *s_id = "<unknown>"; | ||||
| 	const char *errstr; | ||||
| 	struct va_format vaf = { .fmt = fmt }; | ||||
| 	va_list args; | ||||
| 
 | ||||
| 	if (fs_info) | ||||
| 		s_id = fs_info->sb->s_id; | ||||
| 
 | ||||
| 	va_start(args, fmt); | ||||
| 	vaf.va = &args; | ||||
| 
 | ||||
| 	errstr = btrfs_decode_error(errno); | ||||
| 	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR))) | ||||
| 		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", | ||||
| 			s_id, function, line, &vaf, errno, errstr); | ||||
| 
 | ||||
| 	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)", | ||||
| 		   function, line, &vaf, errno, errstr); | ||||
| 	va_end(args); | ||||
| 	/* Caller calls BUG() */ | ||||
| } | ||||
| 
 | ||||
| static void btrfs_put_super(struct super_block *sb) | ||||
| { | ||||
| 	close_ctree(btrfs_sb(sb)); | ||||
|  | @ -918,12 +605,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | |||
| 				ret = -EINVAL; | ||||
| 				goto out; | ||||
| 			} | ||||
| 			btrfs_clear_opt(info->mount_opt, NODISCARD); | ||||
| 			break; | ||||
| 		case Opt_nodiscard: | ||||
| 			btrfs_clear_and_info(info, DISCARD_SYNC, | ||||
| 					     "turning off discard"); | ||||
| 			btrfs_clear_and_info(info, DISCARD_ASYNC, | ||||
| 					     "turning off async discard"); | ||||
| 			btrfs_set_opt(info->mount_opt, NODISCARD); | ||||
| 			break; | ||||
| 		case Opt_space_cache: | ||||
| 		case Opt_space_cache_version: | ||||
|  | @ -1394,6 +1083,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec | |||
| 	struct btrfs_dir_item *di; | ||||
| 	struct btrfs_path *path; | ||||
| 	struct btrfs_key location; | ||||
| 	struct fscrypt_str name = FSTR_INIT("default", 7); | ||||
| 	u64 dir_id; | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
|  | @ -1406,7 +1096,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec | |||
| 	 * to mount. | ||||
| 	 */ | ||||
| 	dir_id = btrfs_super_root_dir(fs_info->super_copy); | ||||
| 	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | ||||
| 	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); | ||||
| 	if (IS_ERR(di)) { | ||||
| 		btrfs_free_path(path); | ||||
| 		return PTR_ERR(di); | ||||
|  | @ -1507,7 +1197,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
| 			 * Exit unless we have some pending changes | ||||
| 			 * that need to go through commit | ||||
| 			 */ | ||||
| 			if (fs_info->pending_changes == 0) | ||||
| 			if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT, | ||||
| 				      &fs_info->flags)) | ||||
| 				return 0; | ||||
| 			/*
 | ||||
| 			 * A non-blocking test if the fs is frozen. We must not | ||||
|  | @ -2645,7 +2336,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | |||
| 	 * the end of RCU grace period. | ||||
| 	 */ | ||||
| 	rcu_read_lock(); | ||||
| 	seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\"); | ||||
| 	seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\"); | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	return 0; | ||||
|  | @ -2694,7 +2385,7 @@ static __cold void btrfs_interface_exit(void) | |||
| 	misc_deregister(&btrfs_misc); | ||||
| } | ||||
| 
 | ||||
| static void __init btrfs_print_mod_info(void) | ||||
| static int __init btrfs_print_mod_info(void) | ||||
| { | ||||
| 	static const char options[] = "" | ||||
| #ifdef CONFIG_BTRFS_DEBUG | ||||
|  | @ -2721,122 +2412,125 @@ static void __init btrfs_print_mod_info(void) | |||
| #endif | ||||
| 			; | ||||
| 	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int __init init_btrfs_fs(void) | ||||
| static int register_btrfs(void) | ||||
| { | ||||
| 	int err; | ||||
| 	return register_filesystem(&btrfs_fs_type); | ||||
| } | ||||
| 
 | ||||
| 	btrfs_props_init(); | ||||
| static void unregister_btrfs(void) | ||||
| { | ||||
| 	unregister_filesystem(&btrfs_fs_type); | ||||
| } | ||||
| 
 | ||||
| 	err = btrfs_init_sysfs(); | ||||
| 	if (err) | ||||
| 		return err; | ||||
| /* Helper structure for long init/exit functions. */ | ||||
| struct init_sequence { | ||||
| 	int (*init_func)(void); | ||||
| 	/* Can be NULL if the init_func doesn't need cleanup. */ | ||||
| 	void (*exit_func)(void); | ||||
| }; | ||||
| 
 | ||||
| 	btrfs_init_compress(); | ||||
| static const struct init_sequence mod_init_seq[] = { | ||||
| 	{ | ||||
| 		.init_func = btrfs_props_init, | ||||
| 		.exit_func = NULL, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_init_sysfs, | ||||
| 		.exit_func = btrfs_exit_sysfs, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_init_compress, | ||||
| 		.exit_func = btrfs_exit_compress, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_init_cachep, | ||||
| 		.exit_func = btrfs_destroy_cachep, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_transaction_init, | ||||
| 		.exit_func = btrfs_transaction_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_ctree_init, | ||||
| 		.exit_func = btrfs_ctree_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_free_space_init, | ||||
| 		.exit_func = btrfs_free_space_exit, | ||||
| 	}, { | ||||
| 		.init_func = extent_state_init_cachep, | ||||
| 		.exit_func = extent_state_free_cachep, | ||||
| 	}, { | ||||
| 		.init_func = extent_buffer_init_cachep, | ||||
| 		.exit_func = extent_buffer_free_cachep, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_bioset_init, | ||||
| 		.exit_func = btrfs_bioset_exit, | ||||
| 	}, { | ||||
| 		.init_func = extent_map_init, | ||||
| 		.exit_func = extent_map_exit, | ||||
| 	}, { | ||||
| 		.init_func = ordered_data_init, | ||||
| 		.exit_func = ordered_data_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_delayed_inode_init, | ||||
| 		.exit_func = btrfs_delayed_inode_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_auto_defrag_init, | ||||
| 		.exit_func = btrfs_auto_defrag_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_delayed_ref_init, | ||||
| 		.exit_func = btrfs_delayed_ref_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_prelim_ref_init, | ||||
| 		.exit_func = btrfs_prelim_ref_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_interface_init, | ||||
| 		.exit_func = btrfs_interface_exit, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_print_mod_info, | ||||
| 		.exit_func = NULL, | ||||
| 	}, { | ||||
| 		.init_func = btrfs_run_sanity_tests, | ||||
| 		.exit_func = NULL, | ||||
| 	}, { | ||||
| 		.init_func = register_btrfs, | ||||
| 		.exit_func = unregister_btrfs, | ||||
| 	} | ||||
| }; | ||||
| 
 | ||||
| 	err = btrfs_init_cachep(); | ||||
| 	if (err) | ||||
| 		goto free_compress; | ||||
| static bool mod_init_result[ARRAY_SIZE(mod_init_seq)]; | ||||
| 
 | ||||
| 	err = extent_state_init_cachep(); | ||||
| 	if (err) | ||||
| 		goto free_cachep; | ||||
| static __always_inline void btrfs_exit_btrfs_fs(void) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	err = extent_buffer_init_cachep(); | ||||
| 	if (err) | ||||
| 		goto free_extent_cachep; | ||||
| 
 | ||||
| 	err = btrfs_bioset_init(); | ||||
| 	if (err) | ||||
| 		goto free_eb_cachep; | ||||
| 
 | ||||
| 	err = extent_map_init(); | ||||
| 	if (err) | ||||
| 		goto free_bioset; | ||||
| 
 | ||||
| 	err = ordered_data_init(); | ||||
| 	if (err) | ||||
| 		goto free_extent_map; | ||||
| 
 | ||||
| 	err = btrfs_delayed_inode_init(); | ||||
| 	if (err) | ||||
| 		goto free_ordered_data; | ||||
| 
 | ||||
| 	err = btrfs_auto_defrag_init(); | ||||
| 	if (err) | ||||
| 		goto free_delayed_inode; | ||||
| 
 | ||||
| 	err = btrfs_delayed_ref_init(); | ||||
| 	if (err) | ||||
| 		goto free_auto_defrag; | ||||
| 
 | ||||
| 	err = btrfs_prelim_ref_init(); | ||||
| 	if (err) | ||||
| 		goto free_delayed_ref; | ||||
| 
 | ||||
| 	err = btrfs_interface_init(); | ||||
| 	if (err) | ||||
| 		goto free_prelim_ref; | ||||
| 
 | ||||
| 	btrfs_print_mod_info(); | ||||
| 
 | ||||
| 	err = btrfs_run_sanity_tests(); | ||||
| 	if (err) | ||||
| 		goto unregister_ioctl; | ||||
| 
 | ||||
| 	err = register_filesystem(&btrfs_fs_type); | ||||
| 	if (err) | ||||
| 		goto unregister_ioctl; | ||||
| 
 | ||||
| 	return 0; | ||||
| 
 | ||||
| unregister_ioctl: | ||||
| 	btrfs_interface_exit(); | ||||
| free_prelim_ref: | ||||
| 	btrfs_prelim_ref_exit(); | ||||
| free_delayed_ref: | ||||
| 	btrfs_delayed_ref_exit(); | ||||
| free_auto_defrag: | ||||
| 	btrfs_auto_defrag_exit(); | ||||
| free_delayed_inode: | ||||
| 	btrfs_delayed_inode_exit(); | ||||
| free_ordered_data: | ||||
| 	ordered_data_exit(); | ||||
| free_extent_map: | ||||
| 	extent_map_exit(); | ||||
| free_bioset: | ||||
| 	btrfs_bioset_exit(); | ||||
| free_eb_cachep: | ||||
| 	extent_buffer_free_cachep(); | ||||
| free_extent_cachep: | ||||
| 	extent_state_free_cachep(); | ||||
| free_cachep: | ||||
| 	btrfs_destroy_cachep(); | ||||
| free_compress: | ||||
| 	btrfs_exit_compress(); | ||||
| 	btrfs_exit_sysfs(); | ||||
| 
 | ||||
| 	return err; | ||||
| 	for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) { | ||||
| 		if (!mod_init_result[i]) | ||||
| 			continue; | ||||
| 		if (mod_init_seq[i].exit_func) | ||||
| 			mod_init_seq[i].exit_func(); | ||||
| 		mod_init_result[i] = false; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void __exit exit_btrfs_fs(void) | ||||
| { | ||||
| 	btrfs_destroy_cachep(); | ||||
| 	btrfs_delayed_ref_exit(); | ||||
| 	btrfs_auto_defrag_exit(); | ||||
| 	btrfs_delayed_inode_exit(); | ||||
| 	btrfs_prelim_ref_exit(); | ||||
| 	ordered_data_exit(); | ||||
| 	extent_map_exit(); | ||||
| 	btrfs_bioset_exit(); | ||||
| 	extent_state_free_cachep(); | ||||
| 	extent_buffer_free_cachep(); | ||||
| 	btrfs_interface_exit(); | ||||
| 	unregister_filesystem(&btrfs_fs_type); | ||||
| 	btrfs_exit_sysfs(); | ||||
| 	btrfs_cleanup_fs_uuids(); | ||||
| 	btrfs_exit_compress(); | ||||
| 	btrfs_exit_btrfs_fs(); | ||||
| } | ||||
| 
 | ||||
| static int __init init_btrfs_fs(void) | ||||
| { | ||||
| 	int ret; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) { | ||||
| 		ASSERT(!mod_init_result[i]); | ||||
| 		ret = mod_init_seq[i].init_func(); | ||||
| 		if (ret < 0) { | ||||
| 			btrfs_exit_btrfs_fs(); | ||||
| 			return ret; | ||||
| 		} | ||||
| 		mod_init_result[i] = true; | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| late_initcall(init_btrfs_fs); | ||||
|  |  | |||
							
								
								
									
										29
									
								
								fs/btrfs/super.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								fs/btrfs/super.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| 
 | ||||
| #ifndef BTRFS_SUPER_H | ||||
| #define BTRFS_SUPER_H | ||||
| 
 | ||||
| int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | ||||
| 			unsigned long new_flags); | ||||
| int btrfs_sync_fs(struct super_block *sb, int wait); | ||||
| char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, | ||||
| 					  u64 subvol_objectid); | ||||
| 
 | ||||
| static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | ||||
| { | ||||
| 	return sb->s_fs_info; | ||||
| } | ||||
| 
 | ||||
| static inline void btrfs_set_sb_rdonly(struct super_block *sb) | ||||
| { | ||||
| 	sb->s_flags |= SB_RDONLY; | ||||
| 	set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); | ||||
| } | ||||
| 
 | ||||
| static inline void btrfs_clear_sb_rdonly(struct super_block *sb) | ||||
| { | ||||
| 	sb->s_flags &= ~SB_RDONLY; | ||||
| 	clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
|  | @ -10,7 +10,7 @@ | |||
| #include <linux/completion.h> | ||||
| #include <linux/bug.h> | ||||
| #include <crypto/hash.h> | ||||
| 
 | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "discard.h" | ||||
| #include "disk-io.h" | ||||
|  | @ -22,6 +22,8 @@ | |||
| #include "block-group.h" | ||||
| #include "qgroup.h" | ||||
| #include "misc.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Structure name                       Path | ||||
|  | @ -248,7 +250,7 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, | |||
| 	/*
 | ||||
| 	 * We don't want to do full transaction commit from inside sysfs | ||||
| 	 */ | ||||
| 	btrfs_set_pending(fs_info, COMMIT); | ||||
| 	set_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags); | ||||
| 	wake_up_process(fs_info->transaction_kthread); | ||||
| 
 | ||||
| 	return count; | ||||
|  | @ -762,7 +764,7 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj, | |||
| 	val = min(val, BTRFS_MAX_DATA_CHUNK_SIZE); | ||||
| 
 | ||||
| 	/* Limit stripe size to 10% of available space. */ | ||||
| 	val = min(div_factor(fs_info->fs_devices->total_rw_bytes, 1), val); | ||||
| 	val = min(mult_perc(fs_info->fs_devices->total_rw_bytes, 10), val); | ||||
| 
 | ||||
| 	/* Must be multiple of 256M. */ | ||||
| 	val &= ~((u64)SZ_256M - 1); | ||||
|  | @ -959,7 +961,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
| 	/*
 | ||||
| 	 * We don't want to do full transaction commit from inside sysfs | ||||
| 	 */ | ||||
| 	btrfs_set_pending(fs_info, COMMIT); | ||||
| 	set_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags); | ||||
| 	wake_up_process(fs_info->transaction_kthread); | ||||
| 
 | ||||
| 	return len; | ||||
|  | @ -1160,16 +1162,16 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj, | |||
| 
 | ||||
| 	for (i = 0; i < BTRFS_NR_READ_POLICY; i++) { | ||||
| 		if (fs_devices->read_policy == i) | ||||
| 			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]", | ||||
| 			ret += sysfs_emit_at(buf, ret, "%s[%s]", | ||||
| 					 (ret == 0 ? "" : " "), | ||||
| 					 btrfs_read_policy_name[i]); | ||||
| 		else | ||||
| 			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", | ||||
| 			ret += sysfs_emit_at(buf, ret, "%s%s", | ||||
| 					 (ret == 0 ? "" : " "), | ||||
| 					 btrfs_read_policy_name[i]); | ||||
| 	} | ||||
| 
 | ||||
| 	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); | ||||
| 	ret += sysfs_emit_at(buf, ret, "\n"); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ | |||
| #include "../disk-io.h" | ||||
| #include "../qgroup.h" | ||||
| #include "../block-group.h" | ||||
| #include "../fs.h" | ||||
| 
 | ||||
| static struct vfsmount *test_mnt = NULL; | ||||
| 
 | ||||
|  | @ -101,7 +102,7 @@ struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info) | |||
| 	if (!dev) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 
 | ||||
| 	extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL); | ||||
| 	extent_io_tree_init(NULL, &dev->alloc_state, 0); | ||||
| 	INIT_LIST_HEAD(&dev->dev_list); | ||||
| 	list_add(&dev->dev_list, &fs_info->fs_devices->devices); | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| #include "../ctree.h" | ||||
| #include "../extent_io.h" | ||||
| #include "../disk-io.h" | ||||
| #include "../accessors.h" | ||||
| 
 | ||||
| static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) | ||||
| { | ||||
|  |  | |||
|  | @ -132,7 +132,7 @@ static int test_find_delalloc(u32 sectorsize) | |||
| 	 * Passing NULL as we don't have fs_info but tracepoints are not used | ||||
| 	 * at this point | ||||
| 	 */ | ||||
| 	extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST, NULL); | ||||
| 	extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * First go through and create and mark all of our pages dirty, we pin | ||||
|  | @ -489,7 +489,7 @@ static int test_find_first_clear_extent_bit(void) | |||
| 
 | ||||
| 	test_msg("running find_first_clear_extent_bit test"); | ||||
| 
 | ||||
| 	extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL); | ||||
| 	extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST); | ||||
| 
 | ||||
| 	/* Test correct handling of empty tree */ | ||||
| 	find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED); | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ | |||
| #include "../free-space-tree.h" | ||||
| #include "../transaction.h" | ||||
| #include "../block-group.h" | ||||
| #include "../accessors.h" | ||||
| 
 | ||||
| struct free_space_extent { | ||||
| 	u64 start; | ||||
|  | @ -470,7 +471,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, | |||
| 	} | ||||
| 	cache->bitmap_low_thresh = 0; | ||||
| 	cache->bitmap_high_thresh = (u32)-1; | ||||
| 	cache->needs_free_space = 1; | ||||
| 	set_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &cache->runtime_flags); | ||||
| 	cache->fs_info = root->fs_info; | ||||
| 
 | ||||
| 	btrfs_init_dummy_trans(&trans, root->fs_info); | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include "../extent_io.h" | ||||
| #include "../volumes.h" | ||||
| #include "../compression.h" | ||||
| #include "../accessors.h" | ||||
| 
 | ||||
| static void insert_extent(struct btrfs_root *root, u64 start, u64 len, | ||||
| 			  u64 ram_bytes, u64 offset, u64 disk_bytenr, | ||||
|  | @ -72,8 +73,8 @@ static void insert_inode_item_key(struct btrfs_root *root) | |||
|  * diagram of how the extents will look though this may not be possible we still | ||||
|  * want to make sure everything acts normally (the last number is not inclusive) | ||||
|  * | ||||
|  * [0 - 5][5 -  6][     6 - 4096     ][ 4096 - 4100][4100 - 8195][8195 - 12291] | ||||
|  * [hole ][inline][hole but no extent][  hole   ][   regular ][regular1 split] | ||||
|  * [0  - 6][     6 - 4096     ][ 4096 - 4100][4100 - 8195][8195  -  12291] | ||||
|  * [inline][hole but no extent][    hole    ][   regular ][regular1 split] | ||||
|  * | ||||
|  * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ] | ||||
|  * [    hole    ][regular1 split][   prealloc ][   prealloc1  ][prealloc1 written] | ||||
|  | @ -90,19 +91,12 @@ static void setup_file_extents(struct btrfs_root *root, u32 sectorsize) | |||
| 	u64 disk_bytenr = SZ_1M; | ||||
| 	u64 offset = 0; | ||||
| 
 | ||||
| 	/* First we want a hole */ | ||||
| 	insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, | ||||
| 		      slot); | ||||
| 	slot++; | ||||
| 	offset += 5; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now we want an inline extent, I don't think this is possible but hey | ||||
| 	 * why not?  Also keep in mind if we have an inline extent it counts as | ||||
| 	 * the whole first page.  If we were to expand it we would have to cow | ||||
| 	 * and we wouldn't have an inline extent anymore. | ||||
| 	 * Tree-checker has strict limits on inline extents that they can only | ||||
| 	 * exist at file offset 0, thus we can only have one inline file extent | ||||
| 	 * at most. | ||||
| 	 */ | ||||
| 	insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, | ||||
| 	insert_extent(root, offset, 6, 6, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, | ||||
| 		      slot); | ||||
| 	slot++; | ||||
| 	offset = sectorsize; | ||||
|  | @ -281,37 +275,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) | |||
| 		test_err("got an error when we shouldn't have"); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (em->block_start != EXTENT_MAP_HOLE) { | ||||
| 		test_err("expected a hole, got %llu", em->block_start); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (em->start != 0 || em->len != 5) { | ||||
| 		test_err( | ||||
| 		"unexpected extent wanted start 0 len 5, got start %llu len %llu", | ||||
| 			em->start, em->len); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (em->flags != 0) { | ||||
| 		test_err("unexpected flags set, want 0 have %lu", em->flags); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	offset = em->start + em->len; | ||||
| 	free_extent_map(em); | ||||
| 
 | ||||
| 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); | ||||
| 	if (IS_ERR(em)) { | ||||
| 		test_err("got an error when we shouldn't have"); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (em->block_start != EXTENT_MAP_INLINE) { | ||||
| 		test_err("expected an inline, got %llu", em->block_start); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (em->start != offset || em->len != (sectorsize - 5)) { | ||||
| 	/*
 | ||||
| 	 * For inline extent, we always round up the em to sectorsize, as | ||||
| 	 * they are either: | ||||
| 	 * | ||||
| 	 * a) a hidden hole | ||||
| 	 *    The range will be zeroed at inline extent read time. | ||||
| 	 * | ||||
| 	 * b) a file extent with unaligned bytenr | ||||
| 	 *    Tree checker will reject it. | ||||
| 	 */ | ||||
| 	if (em->start != 0 || em->len != sectorsize) { | ||||
| 		test_err( | ||||
| 	"unexpected extent wanted start %llu len 1, got start %llu len %llu", | ||||
| 			offset, em->start, em->len); | ||||
| 	"unexpected extent wanted start 0 len %u, got start %llu len %llu", | ||||
| 			sectorsize, em->start, em->len); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (em->flags != 0) { | ||||
|  |  | |||
|  | @ -10,6 +10,8 @@ | |||
| #include "../disk-io.h" | ||||
| #include "../qgroup.h" | ||||
| #include "../backref.h" | ||||
| #include "../fs.h" | ||||
| #include "../accessors.h" | ||||
| 
 | ||||
| static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, | ||||
| 				  u64 num_bytes, u64 parent, u64 root_objectid) | ||||
|  | @ -203,6 +205,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, | |||
| static int test_no_shared_qgroup(struct btrfs_root *root, | ||||
| 		u32 sectorsize, u32 nodesize) | ||||
| { | ||||
| 	struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 	struct btrfs_trans_handle trans; | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct ulist *old_roots = NULL; | ||||
|  | @ -218,16 +221,22 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	ctx.bytenr = nodesize; | ||||
| 	ctx.trans = &trans; | ||||
| 	ctx.fs_info = fs_info; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Since the test trans doesn't have the complicated delayed refs, | ||||
| 	 * we can only call btrfs_qgroup_account_extent() directly to test | ||||
| 	 * quota. | ||||
| 	 */ | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	old_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, | ||||
| 				BTRFS_FS_TREE_OBJECTID); | ||||
|  | @ -236,12 +245,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		ulist_free(old_roots); | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	new_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, | ||||
| 					  new_roots); | ||||
|  | @ -260,11 +271,13 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	old_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = remove_extent_item(root, nodesize, nodesize); | ||||
| 	if (ret) { | ||||
|  | @ -272,12 +285,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		ulist_free(old_roots); | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	new_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, | ||||
| 					  new_roots); | ||||
|  | @ -302,6 +317,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
| static int test_multiple_refs(struct btrfs_root *root, | ||||
| 		u32 sectorsize, u32 nodesize) | ||||
| { | ||||
| 	struct btrfs_backref_walk_ctx ctx = { 0 }; | ||||
| 	struct btrfs_trans_handle trans; | ||||
| 	struct btrfs_fs_info *fs_info = root->fs_info; | ||||
| 	struct ulist *old_roots = NULL; | ||||
|  | @ -322,11 +338,17 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); | ||||
| 	ctx.bytenr = nodesize; | ||||
| 	ctx.trans = &trans; | ||||
| 	ctx.fs_info = fs_info; | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	old_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, | ||||
| 				BTRFS_FS_TREE_OBJECTID); | ||||
|  | @ -335,12 +357,14 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		ulist_free(old_roots); | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	new_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, | ||||
| 					  new_roots); | ||||
|  | @ -355,11 +379,13 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	old_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = add_tree_ref(root, nodesize, nodesize, 0, | ||||
| 			BTRFS_FIRST_FREE_OBJECTID); | ||||
|  | @ -368,12 +394,14 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		ulist_free(old_roots); | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	new_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, | ||||
| 					  new_roots); | ||||
|  | @ -394,11 +422,13 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	old_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = remove_extent_ref(root, nodesize, nodesize, 0, | ||||
| 				BTRFS_FIRST_FREE_OBJECTID); | ||||
|  | @ -407,12 +437,14 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); | ||||
| 	ret = btrfs_find_all_roots(&ctx, false); | ||||
| 	if (ret) { | ||||
| 		ulist_free(old_roots); | ||||
| 		test_err("couldn't find old roots: %d", ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 	new_roots = ctx.roots; | ||||
| 	ctx.roots = NULL; | ||||
| 
 | ||||
| 	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, | ||||
| 					  new_roots); | ||||
|  |  | |||
|  | @ -6,6 +6,7 @@ | |||
| #include <linux/fs.h> | ||||
| #include <linux/slab.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/sched/mm.h> | ||||
| #include <linux/writeback.h> | ||||
| #include <linux/pagemap.h> | ||||
| #include <linux/blkdev.h> | ||||
|  | @ -23,6 +24,18 @@ | |||
| #include "block-group.h" | ||||
| #include "space-info.h" | ||||
| #include "zoned.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "extent-tree.h" | ||||
| #include "root-tree.h" | ||||
| #include "defrag.h" | ||||
| #include "dir-item.h" | ||||
| #include "uuid-tree.h" | ||||
| #include "ioctl.h" | ||||
| #include "relocation.h" | ||||
| #include "scrub.h" | ||||
| 
 | ||||
| static struct kmem_cache *btrfs_trans_handle_cachep; | ||||
| 
 | ||||
| #define BTRFS_ROOT_TRANS_TAG 0 | ||||
| 
 | ||||
|  | @ -365,9 +378,9 @@ loop: | |||
| 	spin_lock_init(&cur_trans->releasing_ebs_lock); | ||||
| 	list_add_tail(&cur_trans->list, &fs_info->trans_list); | ||||
| 	extent_io_tree_init(fs_info, &cur_trans->dirty_pages, | ||||
| 			IO_TREE_TRANS_DIRTY_PAGES, NULL); | ||||
| 			IO_TREE_TRANS_DIRTY_PAGES); | ||||
| 	extent_io_tree_init(fs_info, &cur_trans->pinned_extents, | ||||
| 			IO_TREE_FS_PINNED_EXTENTS, NULL); | ||||
| 			IO_TREE_FS_PINNED_EXTENTS); | ||||
| 	fs_info->generation++; | ||||
| 	cur_trans->transid = fs_info->generation; | ||||
| 	fs_info->running_transaction = cur_trans; | ||||
|  | @ -936,7 +949,7 @@ static bool should_end_transaction(struct btrfs_trans_handle *trans) | |||
| 	if (btrfs_check_space_for_delayed_refs(fs_info)) | ||||
| 		return true; | ||||
| 
 | ||||
| 	return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); | ||||
| 	return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 50); | ||||
| } | ||||
| 
 | ||||
| bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans) | ||||
|  | @ -1607,10 +1620,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	struct btrfs_root *root = pending->root; | ||||
| 	struct btrfs_root *parent_root; | ||||
| 	struct btrfs_block_rsv *rsv; | ||||
| 	struct inode *parent_inode; | ||||
| 	struct inode *parent_inode = pending->dir; | ||||
| 	struct btrfs_path *path; | ||||
| 	struct btrfs_dir_item *dir_item; | ||||
| 	struct dentry *dentry; | ||||
| 	struct extent_buffer *tmp; | ||||
| 	struct extent_buffer *old; | ||||
| 	struct timespec64 cur_time; | ||||
|  | @ -1619,6 +1631,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	u64 index = 0; | ||||
| 	u64 objectid; | ||||
| 	u64 root_flags; | ||||
| 	unsigned int nofs_flags; | ||||
| 	struct fscrypt_name fname; | ||||
| 
 | ||||
| 	ASSERT(pending->path); | ||||
| 	path = pending->path; | ||||
|  | @ -1626,9 +1640,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	ASSERT(pending->root_item); | ||||
| 	new_root_item = pending->root_item; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We're inside a transaction and must make sure that any potential | ||||
| 	 * allocations with GFP_KERNEL in fscrypt won't recurse back to | ||||
| 	 * filesystem. | ||||
| 	 */ | ||||
| 	nofs_flags = memalloc_nofs_save(); | ||||
| 	pending->error = fscrypt_setup_filename(parent_inode, | ||||
| 						&pending->dentry->d_name, 0, | ||||
| 						&fname); | ||||
| 	memalloc_nofs_restore(nofs_flags); | ||||
| 	if (pending->error) | ||||
| 		goto free_pending; | ||||
| 
 | ||||
| 	pending->error = btrfs_get_free_objectid(tree_root, &objectid); | ||||
| 	if (pending->error) | ||||
| 		goto no_free_objectid; | ||||
| 		goto free_fname; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Make qgroup to skip current new snapshot's qgroupid, as it is | ||||
|  | @ -1657,8 +1684,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	trace_btrfs_space_reservation(fs_info, "transaction", | ||||
| 				      trans->transid, | ||||
| 				      trans->bytes_reserved, 1); | ||||
| 	dentry = pending->dentry; | ||||
| 	parent_inode = pending->dir; | ||||
| 	parent_root = BTRFS_I(parent_inode)->root; | ||||
| 	ret = record_root_in_trans(trans, parent_root, 0); | ||||
| 	if (ret) | ||||
|  | @ -1674,8 +1699,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	/* check if there is a file/dir which has the same name. */ | ||||
| 	dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, | ||||
| 					 btrfs_ino(BTRFS_I(parent_inode)), | ||||
| 					 dentry->d_name.name, | ||||
| 					 dentry->d_name.len, 0); | ||||
| 					 &fname.disk_name, 0); | ||||
| 	if (dir_item != NULL && !IS_ERR(dir_item)) { | ||||
| 		pending->error = -EEXIST; | ||||
| 		goto dir_item_existed; | ||||
|  | @ -1770,7 +1794,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	ret = btrfs_add_root_ref(trans, objectid, | ||||
| 				 parent_root->root_key.objectid, | ||||
| 				 btrfs_ino(BTRFS_I(parent_inode)), index, | ||||
| 				 dentry->d_name.name, dentry->d_name.len); | ||||
| 				 &fname.disk_name); | ||||
| 	if (ret) { | ||||
| 		btrfs_abort_transaction(trans, ret); | ||||
| 		goto fail; | ||||
|  | @ -1802,9 +1826,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	if (ret < 0) | ||||
| 		goto fail; | ||||
| 
 | ||||
| 	ret = btrfs_insert_dir_item(trans, dentry->d_name.name, | ||||
| 				    dentry->d_name.len, BTRFS_I(parent_inode), | ||||
| 				    &key, BTRFS_FT_DIR, index); | ||||
| 	ret = btrfs_insert_dir_item(trans, &fname.disk_name, | ||||
| 				    BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, | ||||
| 				    index); | ||||
| 	/* We have check then name at the beginning, so it is impossible. */ | ||||
| 	BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); | ||||
| 	if (ret) { | ||||
|  | @ -1813,7 +1837,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	} | ||||
| 
 | ||||
| 	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + | ||||
| 					 dentry->d_name.len * 2); | ||||
| 						  fname.disk_name.len * 2); | ||||
| 	parent_inode->i_mtime = current_time(parent_inode); | ||||
| 	parent_inode->i_ctime = parent_inode->i_mtime; | ||||
| 	ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode)); | ||||
|  | @ -1845,7 +1869,9 @@ dir_item_existed: | |||
| 	trans->bytes_reserved = 0; | ||||
| clear_skip_qgroup: | ||||
| 	btrfs_clear_skip_qgroup(trans); | ||||
| no_free_objectid: | ||||
| free_fname: | ||||
| 	fscrypt_free_filename(&fname); | ||||
| free_pending: | ||||
| 	kfree(new_root_item); | ||||
| 	pending->root_item = NULL; | ||||
| 	btrfs_free_path(path); | ||||
|  | @ -2101,6 +2127,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | |||
| 	ASSERT(refcount_read(&trans->use_count) == 1); | ||||
| 	btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); | ||||
| 
 | ||||
| 	clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags); | ||||
| 
 | ||||
| 	/* Stop the commit early if ->aborted is set */ | ||||
| 	if (TRANS_ABORTED(cur_trans)) { | ||||
| 		ret = cur_trans->aborted; | ||||
|  | @ -2354,12 +2382,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) | |||
| 	if (ret) | ||||
| 		goto unlock_reloc; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Since the transaction is done, we can apply the pending changes | ||||
| 	 * before the next transaction. | ||||
| 	 */ | ||||
| 	btrfs_apply_pending_changes(fs_info); | ||||
| 
 | ||||
| 	/* commit_fs_roots gets rid of all the tree log roots, it is now
 | ||||
| 	 * safe to free the root of tree log roots | ||||
| 	 */ | ||||
|  | @ -2582,21 +2604,17 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info) | |||
| 	return (ret < 0) ? 0 : 1; | ||||
| } | ||||
| 
 | ||||
| void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) | ||||
| int __init btrfs_transaction_init(void) | ||||
| { | ||||
| 	unsigned long prev; | ||||
| 	unsigned long bit; | ||||
| 
 | ||||
| 	prev = xchg(&fs_info->pending_changes, 0); | ||||
| 	if (!prev) | ||||
| 		return; | ||||
| 
 | ||||
| 	bit = 1 << BTRFS_PENDING_COMMIT; | ||||
| 	if (prev & bit) | ||||
| 		btrfs_debug(fs_info, "pending commit done"); | ||||
| 	prev &= ~bit; | ||||
| 
 | ||||
| 	if (prev) | ||||
| 		btrfs_warn(fs_info, | ||||
| 			"unknown pending changes left 0x%lx, ignoring", prev); | ||||
| 	btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", | ||||
| 			sizeof(struct btrfs_trans_handle), 0, | ||||
| 			SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL); | ||||
| 	if (!btrfs_trans_handle_cachep) | ||||
| 		return -ENOMEM; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void __cold btrfs_transaction_exit(void) | ||||
| { | ||||
| 	kmem_cache_destroy(btrfs_trans_handle_cachep); | ||||
| } | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ | |||
| #include "btrfs_inode.h" | ||||
| #include "delayed-ref.h" | ||||
| #include "ctree.h" | ||||
| #include "misc.h" | ||||
| 
 | ||||
| enum btrfs_trans_state { | ||||
| 	TRANS_STATE_RUNNING, | ||||
|  | @ -98,14 +99,15 @@ struct btrfs_transaction { | |||
| 	struct list_head releasing_ebs; | ||||
| }; | ||||
| 
 | ||||
| #define __TRANS_FREEZABLE	(1U << 0) | ||||
| 
 | ||||
| #define __TRANS_START		(1U << 9) | ||||
| #define __TRANS_ATTACH		(1U << 10) | ||||
| #define __TRANS_JOIN		(1U << 11) | ||||
| #define __TRANS_JOIN_NOLOCK	(1U << 12) | ||||
| #define __TRANS_DUMMY		(1U << 13) | ||||
| #define __TRANS_JOIN_NOSTART	(1U << 14) | ||||
| enum { | ||||
| 	ENUM_BIT(__TRANS_FREEZABLE), | ||||
| 	ENUM_BIT(__TRANS_START), | ||||
| 	ENUM_BIT(__TRANS_ATTACH), | ||||
| 	ENUM_BIT(__TRANS_JOIN), | ||||
| 	ENUM_BIT(__TRANS_JOIN_NOLOCK), | ||||
| 	ENUM_BIT(__TRANS_DUMMY), | ||||
| 	ENUM_BIT(__TRANS_JOIN_NOSTART), | ||||
| }; | ||||
| 
 | ||||
| #define TRANS_START		(__TRANS_START | __TRANS_FREEZABLE) | ||||
| #define TRANS_ATTACH		(__TRANS_ATTACH) | ||||
|  | @ -231,9 +233,11 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark); | |||
| int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||||
| int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | ||||
| void btrfs_put_transaction(struct btrfs_transaction *transaction); | ||||
| void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); | ||||
| void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, | ||||
| 			    struct btrfs_root *root); | ||||
| void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans); | ||||
| 
 | ||||
| int __init btrfs_transaction_init(void); | ||||
| void __cold btrfs_transaction_exit(void); | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ | |||
| #include <linux/types.h> | ||||
| #include <linux/stddef.h> | ||||
| #include <linux/error-injection.h> | ||||
| #include "messages.h" | ||||
| #include "ctree.h" | ||||
| #include "tree-checker.h" | ||||
| #include "disk-io.h" | ||||
|  | @ -25,6 +26,9 @@ | |||
| #include "volumes.h" | ||||
| #include "misc.h" | ||||
| #include "btrfs_inode.h" | ||||
| #include "fs.h" | ||||
| #include "accessors.h" | ||||
| #include "file-item.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Error message should follow the following format: | ||||
|  | @ -528,7 +532,7 @@ static int check_dir_item(struct extent_buffer *leaf, | |||
| 		} | ||||
| 
 | ||||
| 		/* dir type check */ | ||||
| 		dir_type = btrfs_dir_type(leaf, di); | ||||
| 		dir_type = btrfs_dir_ftype(leaf, di); | ||||
| 		if (unlikely(dir_type >= BTRFS_FT_MAX)) { | ||||
| 			dir_item_err(leaf, slot, | ||||
| 			"invalid dir item type, have %u expect [0, %u)", | ||||
|  | @ -1780,10 +1784,10 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) | |||
| 
 | ||||
| 		/* Also check if the item pointer overlaps with btrfs item. */ | ||||
| 		if (unlikely(btrfs_item_ptr_offset(leaf, slot) < | ||||
| 			     btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item))) { | ||||
| 			     btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) { | ||||
| 			generic_err(leaf, slot, | ||||
| 		"slot overlaps with its data, item end %lu data start %lu", | ||||
| 				btrfs_item_nr_offset(slot) + | ||||
| 				btrfs_item_nr_offset(leaf, slot) + | ||||
| 				sizeof(struct btrfs_item), | ||||
| 				btrfs_item_ptr_offset(leaf, slot)); | ||||
| 			return -EUCLEAN; | ||||
|  |  | |||
|  | @ -6,8 +6,39 @@ | |||
| #ifndef BTRFS_TREE_CHECKER_H | ||||
| #define BTRFS_TREE_CHECKER_H | ||||
| 
 | ||||
| #include "ctree.h" | ||||
| #include "extent_io.h" | ||||
| #include <uapi/linux/btrfs_tree.h> | ||||
| 
 | ||||
| struct extent_buffer; | ||||
| struct btrfs_chunk; | ||||
| 
 | ||||
| /* All the extra info needed to verify the parentness of a tree block. */ | ||||
| struct btrfs_tree_parent_check { | ||||
| 	/*
 | ||||
| 	 * The owner check against the tree block. | ||||
| 	 * | ||||
| 	 * Can be 0 to skip the owner check. | ||||
| 	 */ | ||||
| 	u64 owner_root; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Expected transid, can be 0 to skip the check, but such skip | ||||
| 	 * should only be utlized for backref walk related code. | ||||
| 	 */ | ||||
| 	u64 transid; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The expected first key. | ||||
| 	 * | ||||
| 	 * This check can be skipped if @has_first_key is false, such skip | ||||
| 	 * can happen for case where we don't have the parent node key, | ||||
| 	 * e.g. reading the tree root, doing backref walk. | ||||
| 	 */ | ||||
| 	struct btrfs_key first_key; | ||||
| 	bool has_first_key; | ||||
| 
 | ||||
| 	/* The expected level. Should always be set. */ | ||||
| 	u8 level; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Comprehensive leaf checker. | ||||
|  |  | |||
Some files were not shown because too many files have changed in this diff Show more
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds