mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 08:44:41 +00:00 
			
		
		
		
	 7554a8bb6d
			
		
	
	
		7554a8bb6d
		
	
	
	
	
		
			
			This adds a new helper, bch2_folio_reservation_get_partial(), which reserves as many blocks as possible and may return partial success. __bch2_buffered_write() is switched to the new helper - this fixes fstests generic/275, the write until -ENOSPC test. generic/230 now fails: this appears to be a test bug, where xfs_io isn't looping after a partial write to get the error code. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
		
			
				
	
	
		
			823 lines
		
	
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			823 lines
		
	
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| #ifndef NO_BCACHEFS_FS
 | |
| 
 | |
| #include "bcachefs.h"
 | |
| #include "btree_iter.h"
 | |
| #include "extents.h"
 | |
| #include "fs-io.h"
 | |
| #include "fs-io-pagecache.h"
 | |
| #include "subvolume.h"
 | |
| 
 | |
| #include <linux/pagevec.h>
 | |
| #include <linux/writeback.h>
 | |
| 
 | |
| int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
 | |
| 				     loff_t start, u64 end,
 | |
| 				     fgf_t fgp_flags, gfp_t gfp,
 | |
| 				     folios *fs)
 | |
| {
 | |
| 	struct folio *f;
 | |
| 	u64 pos = start;
 | |
| 	int ret = 0;
 | |
| 
 | |
| 	while (pos < end) {
 | |
| 		if ((u64) pos >= (u64) start + (1ULL << 20))
 | |
| 			fgp_flags &= ~FGP_CREAT;
 | |
| 
 | |
| 		ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
 | |
| 		if (ret)
 | |
| 			break;
 | |
| 
 | |
| 		f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
 | |
| 		if (IS_ERR_OR_NULL(f))
 | |
| 			break;
 | |
| 
 | |
| 		BUG_ON(fs->nr && folio_pos(f) != pos);
 | |
| 
 | |
| 		pos = folio_end_pos(f);
 | |
| 		darray_push(fs, f);
 | |
| 	}
 | |
| 
 | |
| 	if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
 | |
| 		ret = -ENOMEM;
 | |
| 
 | |
| 	return fs->nr ? 0 : ret;
 | |
| }
 | |
| 
 | |
| /* pagecache_block must be held */
 | |
| int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
 | |
| 					    loff_t start, loff_t end)
 | |
| {
 | |
| 	int ret;
 | |
| 
 | |
| 	/*
 | |
| 	 * XXX: the way this is currently implemented, we can spin if a process
 | |
| 	 * is continually redirtying a specific page
 | |
| 	 */
 | |
| 	do {
 | |
| 		if (!mapping->nrpages)
 | |
| 			return 0;
 | |
| 
 | |
| 		ret = filemap_write_and_wait_range(mapping, start, end);
 | |
| 		if (ret)
 | |
| 			break;
 | |
| 
 | |
| 		if (!mapping->nrpages)
 | |
| 			return 0;
 | |
| 
 | |
| 		ret = invalidate_inode_pages2_range(mapping,
 | |
| 				start >> PAGE_SHIFT,
 | |
| 				end >> PAGE_SHIFT);
 | |
| 	} while (ret == -EBUSY);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| #if 0
 | |
| /* Useful for debug tracing: */
 | |
| static const char * const bch2_folio_sector_states[] = {
 | |
| #define x(n)	#n,
 | |
| 	BCH_FOLIO_SECTOR_STATE()
 | |
| #undef x
 | |
| 	NULL
 | |
| };
 | |
| #endif
 | |
| 
 | |
| static inline enum bch_folio_sector_state
 | |
| folio_sector_dirty(enum bch_folio_sector_state state)
 | |
| {
 | |
| 	switch (state) {
 | |
| 	case SECTOR_unallocated:
 | |
| 		return SECTOR_dirty;
 | |
| 	case SECTOR_reserved:
 | |
| 		return SECTOR_dirty_reserved;
 | |
| 	default:
 | |
| 		return state;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static inline enum bch_folio_sector_state
 | |
| folio_sector_undirty(enum bch_folio_sector_state state)
 | |
| {
 | |
| 	switch (state) {
 | |
| 	case SECTOR_dirty:
 | |
| 		return SECTOR_unallocated;
 | |
| 	case SECTOR_dirty_reserved:
 | |
| 		return SECTOR_reserved;
 | |
| 	default:
 | |
| 		return state;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static inline enum bch_folio_sector_state
 | |
| folio_sector_reserve(enum bch_folio_sector_state state)
 | |
| {
 | |
| 	switch (state) {
 | |
| 	case SECTOR_unallocated:
 | |
| 		return SECTOR_reserved;
 | |
| 	case SECTOR_dirty:
 | |
| 		return SECTOR_dirty_reserved;
 | |
| 	default:
 | |
| 		return state;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /* for newly allocated folios: */
 | |
| struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
 | |
| {
 | |
| 	struct bch_folio *s;
 | |
| 
 | |
| 	s = kzalloc(sizeof(*s) +
 | |
| 		    sizeof(struct bch_folio_sector) *
 | |
| 		    folio_sectors(folio), gfp);
 | |
| 	if (!s)
 | |
| 		return NULL;
 | |
| 
 | |
| 	spin_lock_init(&s->lock);
 | |
| 	folio_attach_private(folio, s);
 | |
| 	return s;
 | |
| }
 | |
| 
 | |
| struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
 | |
| {
 | |
| 	return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
 | |
| }
 | |
| 
 | |
| static unsigned bkey_to_sector_state(struct bkey_s_c k)
 | |
| {
 | |
| 	if (bkey_extent_is_reservation(k))
 | |
| 		return SECTOR_reserved;
 | |
| 	if (bkey_extent_is_allocation(k.k))
 | |
| 		return SECTOR_allocated;
 | |
| 	return SECTOR_unallocated;
 | |
| }
 | |
| 
 | |
| static void __bch2_folio_set(struct folio *folio,
 | |
| 			     unsigned pg_offset, unsigned pg_len,
 | |
| 			     unsigned nr_ptrs, unsigned state)
 | |
| {
 | |
| 	struct bch_folio *s = bch2_folio(folio);
 | |
| 	unsigned i, sectors = folio_sectors(folio);
 | |
| 
 | |
| 	BUG_ON(pg_offset >= sectors);
 | |
| 	BUG_ON(pg_offset + pg_len > sectors);
 | |
| 
 | |
| 	spin_lock(&s->lock);
 | |
| 
 | |
| 	for (i = pg_offset; i < pg_offset + pg_len; i++) {
 | |
| 		s->s[i].nr_replicas	= nr_ptrs;
 | |
| 		bch2_folio_sector_set(folio, s, i, state);
 | |
| 	}
 | |
| 
 | |
| 	if (i == sectors)
 | |
| 		s->uptodate = true;
 | |
| 
 | |
| 	spin_unlock(&s->lock);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
 | |
|  * extents btree:
 | |
|  */
 | |
| int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
 | |
| 		   struct folio **fs, unsigned nr_folios)
 | |
| {
 | |
| 	struct btree_trans *trans;
 | |
| 	struct btree_iter iter;
 | |
| 	struct bkey_s_c k;
 | |
| 	struct bch_folio *s;
 | |
| 	u64 offset = folio_sector(fs[0]);
 | |
| 	unsigned folio_idx;
 | |
| 	u32 snapshot;
 | |
| 	bool need_set = false;
 | |
| 	int ret;
 | |
| 
 | |
| 	for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
 | |
| 		s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
 | |
| 		if (!s)
 | |
| 			return -ENOMEM;
 | |
| 
 | |
| 		need_set |= !s->uptodate;
 | |
| 	}
 | |
| 
 | |
| 	if (!need_set)
 | |
| 		return 0;
 | |
| 
 | |
| 	folio_idx = 0;
 | |
| 	trans = bch2_trans_get(c);
 | |
| retry:
 | |
| 	bch2_trans_begin(trans);
 | |
| 
 | |
| 	ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
 | |
| 	if (ret)
 | |
| 		goto err;
 | |
| 
 | |
| 	for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
 | |
| 			   SPOS(inum.inum, offset, snapshot),
 | |
| 			   BTREE_ITER_slots, k, ret) {
 | |
| 		unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
 | |
| 		unsigned state = bkey_to_sector_state(k);
 | |
| 
 | |
| 		while (folio_idx < nr_folios) {
 | |
| 			struct folio *folio = fs[folio_idx];
 | |
| 			u64 folio_start	= folio_sector(folio);
 | |
| 			u64 folio_end	= folio_end_sector(folio);
 | |
| 			unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
 | |
| 				folio_start;
 | |
| 			unsigned folio_len = min(k.k->p.offset, folio_end) -
 | |
| 				folio_offset - folio_start;
 | |
| 
 | |
| 			BUG_ON(k.k->p.offset < folio_start);
 | |
| 			BUG_ON(bkey_start_offset(k.k) > folio_end);
 | |
| 
 | |
| 			if (!bch2_folio(folio)->uptodate)
 | |
| 				__bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
 | |
| 
 | |
| 			if (k.k->p.offset < folio_end)
 | |
| 				break;
 | |
| 			folio_idx++;
 | |
| 		}
 | |
| 
 | |
| 		if (folio_idx == nr_folios)
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	offset = iter.pos.offset;
 | |
| 	bch2_trans_iter_exit(trans, &iter);
 | |
| err:
 | |
| 	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | |
| 		goto retry;
 | |
| 	bch2_trans_put(trans);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
 | |
| {
 | |
| 	struct bvec_iter iter;
 | |
| 	struct folio_vec fv;
 | |
| 	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
 | |
| 		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
 | |
| 	unsigned state = bkey_to_sector_state(k);
 | |
| 
 | |
| 	bio_for_each_folio(fv, bio, iter)
 | |
| 		__bch2_folio_set(fv.fv_folio,
 | |
| 				 fv.fv_offset >> 9,
 | |
| 				 fv.fv_len >> 9,
 | |
| 				 nr_ptrs, state);
 | |
| }
 | |
| 
 | |
| void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
 | |
| 				     u64 start, u64 end)
 | |
| {
 | |
| 	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
 | |
| 	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 | |
| 	struct folio_batch fbatch;
 | |
| 	unsigned i, j;
 | |
| 
 | |
| 	if (end <= start)
 | |
| 		return;
 | |
| 
 | |
| 	folio_batch_init(&fbatch);
 | |
| 
 | |
| 	while (filemap_get_folios(inode->v.i_mapping,
 | |
| 				  &index, end_index, &fbatch)) {
 | |
| 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 | |
| 			struct folio *folio = fbatch.folios[i];
 | |
| 			u64 folio_start = folio_sector(folio);
 | |
| 			u64 folio_end = folio_end_sector(folio);
 | |
| 			unsigned folio_offset = max(start, folio_start) - folio_start;
 | |
| 			unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
 | |
| 			struct bch_folio *s;
 | |
| 
 | |
| 			BUG_ON(end <= folio_start);
 | |
| 
 | |
| 			folio_lock(folio);
 | |
| 			s = bch2_folio(folio);
 | |
| 
 | |
| 			if (s) {
 | |
| 				spin_lock(&s->lock);
 | |
| 				for (j = folio_offset; j < folio_offset + folio_len; j++)
 | |
| 					s->s[j].nr_replicas = 0;
 | |
| 				spin_unlock(&s->lock);
 | |
| 			}
 | |
| 
 | |
| 			folio_unlock(folio);
 | |
| 		}
 | |
| 		folio_batch_release(&fbatch);
 | |
| 		cond_resched();
 | |
| 	}
 | |
| }
 | |
| 
 | |
| int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
 | |
| 				 u64 *start, u64 end,
 | |
| 				 bool nonblocking)
 | |
| {
 | |
| 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 | |
| 	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
 | |
| 	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 | |
| 	struct folio_batch fbatch;
 | |
| 	s64 i_sectors_delta = 0;
 | |
| 	int ret = 0;
 | |
| 
 | |
| 	if (end <= *start)
 | |
| 		return 0;
 | |
| 
 | |
| 	folio_batch_init(&fbatch);
 | |
| 
 | |
| 	while (filemap_get_folios(inode->v.i_mapping,
 | |
| 				  &index, end_index, &fbatch)) {
 | |
| 		for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
 | |
| 			struct folio *folio = fbatch.folios[i];
 | |
| 
 | |
| 			if (!nonblocking)
 | |
| 				folio_lock(folio);
 | |
| 			else if (!folio_trylock(folio)) {
 | |
| 				folio_batch_release(&fbatch);
 | |
| 				ret = -EAGAIN;
 | |
| 				break;
 | |
| 			}
 | |
| 
 | |
| 			u64 folio_start = folio_sector(folio);
 | |
| 			u64 folio_end = folio_end_sector(folio);
 | |
| 
 | |
| 			BUG_ON(end <= folio_start);
 | |
| 
 | |
| 			*start = min(end, folio_end);
 | |
| 
 | |
| 			struct bch_folio *s = bch2_folio(folio);
 | |
| 			if (s) {
 | |
| 				unsigned folio_offset = max(*start, folio_start) - folio_start;
 | |
| 				unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
 | |
| 
 | |
| 				spin_lock(&s->lock);
 | |
| 				for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
 | |
| 					i_sectors_delta -= s->s[j].state == SECTOR_dirty;
 | |
| 					bch2_folio_sector_set(folio, s, j,
 | |
| 						folio_sector_reserve(s->s[j].state));
 | |
| 				}
 | |
| 				spin_unlock(&s->lock);
 | |
| 			}
 | |
| 
 | |
| 			folio_unlock(folio);
 | |
| 		}
 | |
| 		folio_batch_release(&fbatch);
 | |
| 		cond_resched();
 | |
| 	}
 | |
| 
 | |
| 	bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
 | |
| 					  unsigned nr_replicas)
 | |
| {
 | |
| 	return max(0, (int) nr_replicas -
 | |
| 		   s->nr_replicas -
 | |
| 		   s->replicas_reserved);
 | |
| }
 | |
| 
 | |
| int bch2_get_folio_disk_reservation(struct bch_fs *c,
 | |
| 				struct bch_inode_info *inode,
 | |
| 				struct folio *folio, bool check_enospc)
 | |
| {
 | |
| 	struct bch_folio *s = bch2_folio_create(folio, 0);
 | |
| 	unsigned nr_replicas = inode_nr_replicas(c, inode);
 | |
| 	struct disk_reservation disk_res = { 0 };
 | |
| 	unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
 | |
| 	int ret;
 | |
| 
 | |
| 	if (!s)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	for (i = 0; i < sectors; i++)
 | |
| 		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
 | |
| 
 | |
| 	if (!disk_res_sectors)
 | |
| 		return 0;
 | |
| 
 | |
| 	ret = bch2_disk_reservation_get(c, &disk_res,
 | |
| 					disk_res_sectors, 1,
 | |
| 					!check_enospc
 | |
| 					? BCH_DISK_RESERVATION_NOFAIL
 | |
| 					: 0);
 | |
| 	if (unlikely(ret))
 | |
| 		return ret;
 | |
| 
 | |
| 	for (i = 0; i < sectors; i++)
 | |
| 		s->s[i].replicas_reserved +=
 | |
| 			sectors_to_reserve(&s->s[i], nr_replicas);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void bch2_folio_reservation_put(struct bch_fs *c,
 | |
| 			struct bch_inode_info *inode,
 | |
| 			struct bch2_folio_reservation *res)
 | |
| {
 | |
| 	bch2_disk_reservation_put(c, &res->disk);
 | |
| 	bch2_quota_reservation_put(c, inode, &res->quota);
 | |
| }
 | |
| 
 | |
| int bch2_folio_reservation_get(struct bch_fs *c,
 | |
| 			struct bch_inode_info *inode,
 | |
| 			struct folio *folio,
 | |
| 			struct bch2_folio_reservation *res,
 | |
| 			size_t offset, size_t len)
 | |
| {
 | |
| 	struct bch_folio *s = bch2_folio_create(folio, 0);
 | |
| 	unsigned i, disk_sectors = 0, quota_sectors = 0;
 | |
| 	int ret;
 | |
| 
 | |
| 	if (!s)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	BUG_ON(!s->uptodate);
 | |
| 
 | |
| 	for (i = round_down(offset, block_bytes(c)) >> 9;
 | |
| 	     i < round_up(offset + len, block_bytes(c)) >> 9;
 | |
| 	     i++) {
 | |
| 		disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
 | |
| 		quota_sectors += s->s[i].state == SECTOR_unallocated;
 | |
| 	}
 | |
| 
 | |
| 	if (disk_sectors) {
 | |
| 		ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
 | |
| 		if (unlikely(ret))
 | |
| 			return ret;
 | |
| 	}
 | |
| 
 | |
| 	if (quota_sectors) {
 | |
| 		ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
 | |
| 		if (unlikely(ret)) {
 | |
| 			struct disk_reservation tmp = { .sectors = disk_sectors };
 | |
| 
 | |
| 			bch2_disk_reservation_put(c, &tmp);
 | |
| 			res->disk.sectors -= disk_sectors;
 | |
| 			return ret;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
 | |
| 			struct bch_inode_info *inode,
 | |
| 			struct folio *folio,
 | |
| 			struct bch2_folio_reservation *res,
 | |
| 			size_t offset, size_t len)
 | |
| {
 | |
| 	size_t l, reserved = 0;
 | |
| 	int ret;
 | |
| 
 | |
| 	while ((l = len - reserved)) {
 | |
| 		while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
 | |
| 			if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
 | |
| 				return reserved ?: ret;
 | |
| 
 | |
| 			len = reserved + l;
 | |
| 			l /= 2;
 | |
| 		}
 | |
| 
 | |
| 		offset += l;
 | |
| 		reserved += l;
 | |
| 	}
 | |
| 
 | |
| 	return reserved;
 | |
| }
 | |
| 
 | |
| static void bch2_clear_folio_bits(struct folio *folio)
 | |
| {
 | |
| 	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
 | |
| 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 | |
| 	struct bch_folio *s = bch2_folio(folio);
 | |
| 	struct disk_reservation disk_res = { 0 };
 | |
| 	int i, sectors = folio_sectors(folio), dirty_sectors = 0;
 | |
| 
 | |
| 	if (!s)
 | |
| 		return;
 | |
| 
 | |
| 	EBUG_ON(!folio_test_locked(folio));
 | |
| 	EBUG_ON(folio_test_writeback(folio));
 | |
| 
 | |
| 	for (i = 0; i < sectors; i++) {
 | |
| 		disk_res.sectors += s->s[i].replicas_reserved;
 | |
| 		s->s[i].replicas_reserved = 0;
 | |
| 
 | |
| 		dirty_sectors -= s->s[i].state == SECTOR_dirty;
 | |
| 		bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
 | |
| 	}
 | |
| 
 | |
| 	bch2_disk_reservation_put(c, &disk_res);
 | |
| 
 | |
| 	bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
 | |
| 
 | |
| 	bch2_folio_release(folio);
 | |
| }
 | |
| 
 | |
| void bch2_set_folio_dirty(struct bch_fs *c,
 | |
| 			  struct bch_inode_info *inode,
 | |
| 			  struct folio *folio,
 | |
| 			  struct bch2_folio_reservation *res,
 | |
| 			  unsigned offset, unsigned len)
 | |
| {
 | |
| 	struct bch_folio *s = bch2_folio(folio);
 | |
| 	unsigned i, dirty_sectors = 0;
 | |
| 
 | |
| 	WARN_ON((u64) folio_pos(folio) + offset + len >
 | |
| 		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
 | |
| 
 | |
| 	BUG_ON(!s->uptodate);
 | |
| 
 | |
| 	spin_lock(&s->lock);
 | |
| 
 | |
| 	for (i = round_down(offset, block_bytes(c)) >> 9;
 | |
| 	     i < round_up(offset + len, block_bytes(c)) >> 9;
 | |
| 	     i++) {
 | |
| 		unsigned sectors = sectors_to_reserve(&s->s[i],
 | |
| 						res->disk.nr_replicas);
 | |
| 
 | |
| 		/*
 | |
| 		 * This can happen if we race with the error path in
 | |
| 		 * bch2_writepage_io_done():
 | |
| 		 */
 | |
| 		sectors = min_t(unsigned, sectors, res->disk.sectors);
 | |
| 
 | |
| 		s->s[i].replicas_reserved += sectors;
 | |
| 		res->disk.sectors -= sectors;
 | |
| 
 | |
| 		dirty_sectors += s->s[i].state == SECTOR_unallocated;
 | |
| 
 | |
| 		bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
 | |
| 	}
 | |
| 
 | |
| 	spin_unlock(&s->lock);
 | |
| 
 | |
| 	bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
 | |
| 
 | |
| 	if (!folio_test_dirty(folio))
 | |
| 		filemap_dirty_folio(inode->v.i_mapping, folio);
 | |
| }
 | |
| 
 | |
| vm_fault_t bch2_page_fault(struct vm_fault *vmf)
 | |
| {
 | |
| 	struct file *file = vmf->vma->vm_file;
 | |
| 	struct address_space *mapping = file->f_mapping;
 | |
| 	struct address_space *fdm = faults_disabled_mapping();
 | |
| 	struct bch_inode_info *inode = file_bch_inode(file);
 | |
| 	vm_fault_t ret;
 | |
| 
 | |
| 	if (fdm == mapping)
 | |
| 		return VM_FAULT_SIGBUS;
 | |
| 
 | |
| 	/* Lock ordering: */
 | |
| 	if (fdm > mapping) {
 | |
| 		struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
 | |
| 
 | |
| 		if (bch2_pagecache_add_tryget(inode))
 | |
| 			goto got_lock;
 | |
| 
 | |
| 		bch2_pagecache_block_put(fdm_host);
 | |
| 
 | |
| 		bch2_pagecache_add_get(inode);
 | |
| 		bch2_pagecache_add_put(inode);
 | |
| 
 | |
| 		bch2_pagecache_block_get(fdm_host);
 | |
| 
 | |
| 		/* Signal that lock has been dropped: */
 | |
| 		set_fdm_dropped_locks();
 | |
| 		return VM_FAULT_SIGBUS;
 | |
| 	}
 | |
| 
 | |
| 	bch2_pagecache_add_get(inode);
 | |
| got_lock:
 | |
| 	ret = filemap_fault(vmf);
 | |
| 	bch2_pagecache_add_put(inode);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
 | |
| {
 | |
| 	struct folio *folio = page_folio(vmf->page);
 | |
| 	struct file *file = vmf->vma->vm_file;
 | |
| 	struct bch_inode_info *inode = file_bch_inode(file);
 | |
| 	struct address_space *mapping = file->f_mapping;
 | |
| 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 | |
| 	struct bch2_folio_reservation res;
 | |
| 	unsigned len;
 | |
| 	loff_t isize;
 | |
| 	vm_fault_t ret;
 | |
| 
 | |
| 	bch2_folio_reservation_init(c, inode, &res);
 | |
| 
 | |
| 	sb_start_pagefault(inode->v.i_sb);
 | |
| 	file_update_time(file);
 | |
| 
 | |
| 	/*
 | |
| 	 * Not strictly necessary, but helps avoid dio writes livelocking in
 | |
| 	 * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
 | |
| 	 * a bch2_write_invalidate_inode_pages_range() that works without dropping
 | |
| 	 * page lock before invalidating page
 | |
| 	 */
 | |
| 	bch2_pagecache_add_get(inode);
 | |
| 
 | |
| 	folio_lock(folio);
 | |
| 	isize = i_size_read(&inode->v);
 | |
| 
 | |
| 	if (folio->mapping != mapping || folio_pos(folio) >= isize) {
 | |
| 		folio_unlock(folio);
 | |
| 		ret = VM_FAULT_NOPAGE;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
 | |
| 
 | |
| 	if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
 | |
| 	    bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
 | |
| 		folio_unlock(folio);
 | |
| 		ret = VM_FAULT_SIGBUS;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
 | |
| 	bch2_folio_reservation_put(c, inode, &res);
 | |
| 
 | |
| 	folio_wait_stable(folio);
 | |
| 	ret = VM_FAULT_LOCKED;
 | |
| out:
 | |
| 	bch2_pagecache_add_put(inode);
 | |
| 	sb_end_pagefault(inode->v.i_sb);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
 | |
| {
 | |
| 	if (offset || length < folio_size(folio))
 | |
| 		return;
 | |
| 
 | |
| 	bch2_clear_folio_bits(folio);
 | |
| }
 | |
| 
 | |
| bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
 | |
| {
 | |
| 	if (folio_test_dirty(folio) || folio_test_writeback(folio))
 | |
| 		return false;
 | |
| 
 | |
| 	bch2_clear_folio_bits(folio);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /* fseek: */
 | |
| 
 | |
| static int folio_data_offset(struct folio *folio, loff_t pos,
 | |
| 			     unsigned min_replicas)
 | |
| {
 | |
| 	struct bch_folio *s = bch2_folio(folio);
 | |
| 	unsigned i, sectors = folio_sectors(folio);
 | |
| 
 | |
| 	if (s)
 | |
| 		for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
 | |
| 			if (s->s[i].state >= SECTOR_dirty &&
 | |
| 			    s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
 | |
| 				return i << SECTOR_SHIFT;
 | |
| 
 | |
| 	return -1;
 | |
| }
 | |
| 
 | |
| loff_t bch2_seek_pagecache_data(struct inode *vinode,
 | |
| 				loff_t start_offset,
 | |
| 				loff_t end_offset,
 | |
| 				unsigned min_replicas,
 | |
| 				bool nonblock)
 | |
| {
 | |
| 	struct folio_batch fbatch;
 | |
| 	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
 | |
| 	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
 | |
| 	pgoff_t index		= start_index;
 | |
| 	unsigned i;
 | |
| 	loff_t ret;
 | |
| 	int offset;
 | |
| 
 | |
| 	folio_batch_init(&fbatch);
 | |
| 
 | |
| 	while (filemap_get_folios(vinode->i_mapping,
 | |
| 				  &index, end_index, &fbatch)) {
 | |
| 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 | |
| 			struct folio *folio = fbatch.folios[i];
 | |
| 
 | |
| 			if (!nonblock) {
 | |
| 				folio_lock(folio);
 | |
| 			} else if (!folio_trylock(folio)) {
 | |
| 				folio_batch_release(&fbatch);
 | |
| 				return -EAGAIN;
 | |
| 			}
 | |
| 
 | |
| 			offset = folio_data_offset(folio,
 | |
| 					max(folio_pos(folio), start_offset),
 | |
| 					min_replicas);
 | |
| 			if (offset >= 0) {
 | |
| 				ret = clamp(folio_pos(folio) + offset,
 | |
| 					    start_offset, end_offset);
 | |
| 				folio_unlock(folio);
 | |
| 				folio_batch_release(&fbatch);
 | |
| 				return ret;
 | |
| 			}
 | |
| 			folio_unlock(folio);
 | |
| 		}
 | |
| 		folio_batch_release(&fbatch);
 | |
| 		cond_resched();
 | |
| 	}
 | |
| 
 | |
| 	return end_offset;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Search for a hole in a folio.
 | |
|  *
 | |
|  * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
 | |
|  * code to indicate a pagecache hole exists at the returned offset. Otherwise
 | |
|  * return 0 if the folio is filled with data, or an error code. This function
 | |
|  * can return -EAGAIN if nonblock is specified.
 | |
|  */
 | |
| static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
 | |
| 			      unsigned min_replicas, bool nonblock)
 | |
| {
 | |
| 	struct folio *folio;
 | |
| 	struct bch_folio *s;
 | |
| 	unsigned i, sectors;
 | |
| 	int ret = -ENOENT;
 | |
| 
 | |
| 	folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
 | |
| 				    FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
 | |
| 	if (IS_ERR(folio))
 | |
| 		return PTR_ERR(folio);
 | |
| 
 | |
| 	s = bch2_folio(folio);
 | |
| 	if (!s)
 | |
| 		goto unlock;
 | |
| 
 | |
| 	sectors = folio_sectors(folio);
 | |
| 	for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
 | |
| 		if (s->s[i].state < SECTOR_dirty ||
 | |
| 		    s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
 | |
| 			*offset = max(*offset,
 | |
| 				      folio_pos(folio) + (i << SECTOR_SHIFT));
 | |
| 			goto unlock;
 | |
| 		}
 | |
| 
 | |
| 	*offset = folio_end_pos(folio);
 | |
| 	ret = 0;
 | |
| unlock:
 | |
| 	folio_unlock(folio);
 | |
| 	folio_put(folio);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| loff_t bch2_seek_pagecache_hole(struct inode *vinode,
 | |
| 				loff_t start_offset,
 | |
| 				loff_t end_offset,
 | |
| 				unsigned min_replicas,
 | |
| 				bool nonblock)
 | |
| {
 | |
| 	struct address_space *mapping = vinode->i_mapping;
 | |
| 	loff_t offset = start_offset;
 | |
| 	loff_t ret = 0;
 | |
| 
 | |
| 	while (!ret && offset < end_offset)
 | |
| 		ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
 | |
| 
 | |
| 	if (ret && ret != -ENOENT)
 | |
| 		return ret;
 | |
| 	return min(offset, end_offset);
 | |
| }
 | |
| 
 | |
| int bch2_clamp_data_hole(struct inode *inode,
 | |
| 			 u64 *hole_start,
 | |
| 			 u64 *hole_end,
 | |
| 			 unsigned min_replicas,
 | |
| 			 bool nonblock)
 | |
| {
 | |
| 	loff_t ret;
 | |
| 
 | |
| 	ret = bch2_seek_pagecache_hole(inode,
 | |
| 		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
 | |
| 	if (ret < 0)
 | |
| 		return ret;
 | |
| 
 | |
| 	*hole_start = ret;
 | |
| 
 | |
| 	if (*hole_start == *hole_end)
 | |
| 		return 0;
 | |
| 
 | |
| 	ret = bch2_seek_pagecache_data(inode,
 | |
| 		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
 | |
| 	if (ret < 0)
 | |
| 		return ret;
 | |
| 
 | |
| 	*hole_end = ret;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| #endif /* NO_BCACHEFS_FS */
 |