mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	 9a87907de3
			
		
	
	
		9a87907de3
		
	
	
	
	
		
			
			Combine inode creation with opening a file. There are six separate objects that are being set up: the backing inode, dentry and file, and the overlay inode, dentry and file. Cleanup in case of an error is a bit of a challenge and is difficult to test, so careful review is needed. All tmpfile testcases except generic/509 now run/pass, and no regressions are observed with full xfstests. Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
		
			
				
	
	
		
			357 lines
		
	
	
	
		
			8.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			357 lines
		
	
	
	
		
			8.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0-only
 | |
| /*
 | |
|  * Common helpers for stackable filesystems and backing files.
 | |
|  *
 | |
|  * Forked from fs/overlayfs/file.c.
 | |
|  *
 | |
|  * Copyright (C) 2017 Red Hat, Inc.
 | |
|  * Copyright (C) 2023 CTERA Networks.
 | |
|  */
 | |
| 
 | |
| #include <linux/fs.h>
 | |
| #include <linux/backing-file.h>
 | |
| #include <linux/splice.h>
 | |
| #include <linux/mm.h>
 | |
| 
 | |
| #include "internal.h"
 | |
| 
 | |
| /**
 | |
|  * backing_file_open - open a backing file for kernel internal use
 | |
|  * @user_path:	path that the user reuqested to open
 | |
|  * @flags:	open flags
 | |
|  * @real_path:	path of the backing file
 | |
|  * @cred:	credentials for open
 | |
|  *
 | |
|  * Open a backing file for a stackable filesystem (e.g., overlayfs).
 | |
|  * @user_path may be on the stackable filesystem and @real_path on the
 | |
|  * underlying filesystem.  In this case, we want to be able to return the
 | |
|  * @user_path of the stackable filesystem. This is done by embedding the
 | |
|  * returned file into a container structure that also stores the stacked
 | |
|  * file's path, which can be retrieved using backing_file_user_path().
 | |
|  */
 | |
| struct file *backing_file_open(const struct path *user_path, int flags,
 | |
| 			       const struct path *real_path,
 | |
| 			       const struct cred *cred)
 | |
| {
 | |
| 	struct file *f;
 | |
| 	int error;
 | |
| 
 | |
| 	f = alloc_empty_backing_file(flags, cred);
 | |
| 	if (IS_ERR(f))
 | |
| 		return f;
 | |
| 
 | |
| 	path_get(user_path);
 | |
| 	*backing_file_user_path(f) = *user_path;
 | |
| 	error = vfs_open(real_path, f);
 | |
| 	if (error) {
 | |
| 		fput(f);
 | |
| 		f = ERR_PTR(error);
 | |
| 	}
 | |
| 
 | |
| 	return f;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(backing_file_open);
 | |
| 
 | |
| struct file *backing_tmpfile_open(const struct path *user_path, int flags,
 | |
| 				  const struct path *real_parentpath,
 | |
| 				  umode_t mode, const struct cred *cred)
 | |
| {
 | |
| 	struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
 | |
| 	struct file *f;
 | |
| 	int error;
 | |
| 
 | |
| 	f = alloc_empty_backing_file(flags, cred);
 | |
| 	if (IS_ERR(f))
 | |
| 		return f;
 | |
| 
 | |
| 	path_get(user_path);
 | |
| 	*backing_file_user_path(f) = *user_path;
 | |
| 	error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
 | |
| 	if (error) {
 | |
| 		fput(f);
 | |
| 		f = ERR_PTR(error);
 | |
| 	}
 | |
| 	return f;
 | |
| }
 | |
| EXPORT_SYMBOL(backing_tmpfile_open);
 | |
| 
 | |
| struct backing_aio {
 | |
| 	struct kiocb iocb;
 | |
| 	refcount_t ref;
 | |
| 	struct kiocb *orig_iocb;
 | |
| 	/* used for aio completion */
 | |
| 	void (*end_write)(struct file *);
 | |
| 	struct work_struct work;
 | |
| 	long res;
 | |
| };
 | |
| 
 | |
| static struct kmem_cache *backing_aio_cachep;
 | |
| 
 | |
| #define BACKING_IOCB_MASK \
 | |
| 	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
 | |
| 
 | |
| static rwf_t iocb_to_rw_flags(int flags)
 | |
| {
 | |
| 	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
 | |
| }
 | |
| 
 | |
| static void backing_aio_put(struct backing_aio *aio)
 | |
| {
 | |
| 	if (refcount_dec_and_test(&aio->ref)) {
 | |
| 		fput(aio->iocb.ki_filp);
 | |
| 		kmem_cache_free(backing_aio_cachep, aio);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void backing_aio_cleanup(struct backing_aio *aio, long res)
 | |
| {
 | |
| 	struct kiocb *iocb = &aio->iocb;
 | |
| 	struct kiocb *orig_iocb = aio->orig_iocb;
 | |
| 
 | |
| 	if (aio->end_write)
 | |
| 		aio->end_write(orig_iocb->ki_filp);
 | |
| 
 | |
| 	orig_iocb->ki_pos = iocb->ki_pos;
 | |
| 	backing_aio_put(aio);
 | |
| }
 | |
| 
 | |
| static void backing_aio_rw_complete(struct kiocb *iocb, long res)
 | |
| {
 | |
| 	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
 | |
| 	struct kiocb *orig_iocb = aio->orig_iocb;
 | |
| 
 | |
| 	if (iocb->ki_flags & IOCB_WRITE)
 | |
| 		kiocb_end_write(iocb);
 | |
| 
 | |
| 	backing_aio_cleanup(aio, res);
 | |
| 	orig_iocb->ki_complete(orig_iocb, res);
 | |
| }
 | |
| 
 | |
| static void backing_aio_complete_work(struct work_struct *work)
 | |
| {
 | |
| 	struct backing_aio *aio = container_of(work, struct backing_aio, work);
 | |
| 
 | |
| 	backing_aio_rw_complete(&aio->iocb, aio->res);
 | |
| }
 | |
| 
 | |
| static void backing_aio_queue_completion(struct kiocb *iocb, long res)
 | |
| {
 | |
| 	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
 | |
| 
 | |
| 	/*
 | |
| 	 * Punt to a work queue to serialize updates of mtime/size.
 | |
| 	 */
 | |
| 	aio->res = res;
 | |
| 	INIT_WORK(&aio->work, backing_aio_complete_work);
 | |
| 	queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
 | |
| 		   &aio->work);
 | |
| }
 | |
| 
 | |
| static int backing_aio_init_wq(struct kiocb *iocb)
 | |
| {
 | |
| 	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
 | |
| 
 | |
| 	if (sb->s_dio_done_wq)
 | |
| 		return 0;
 | |
| 
 | |
| 	return sb_init_dio_done_wq(sb);
 | |
| }
 | |
| 
 | |
| 
 | |
| ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
 | |
| 			       struct kiocb *iocb, int flags,
 | |
| 			       struct backing_file_ctx *ctx)
 | |
| {
 | |
| 	struct backing_aio *aio = NULL;
 | |
| 	const struct cred *old_cred;
 | |
| 	ssize_t ret;
 | |
| 
 | |
| 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
 | |
| 		return -EIO;
 | |
| 
 | |
| 	if (!iov_iter_count(iter))
 | |
| 		return 0;
 | |
| 
 | |
| 	if (iocb->ki_flags & IOCB_DIRECT &&
 | |
| 	    !(file->f_mode & FMODE_CAN_ODIRECT))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	old_cred = override_creds(ctx->cred);
 | |
| 	if (is_sync_kiocb(iocb)) {
 | |
| 		rwf_t rwf = iocb_to_rw_flags(flags);
 | |
| 
 | |
| 		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
 | |
| 	} else {
 | |
| 		ret = -ENOMEM;
 | |
| 		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
 | |
| 		if (!aio)
 | |
| 			goto out;
 | |
| 
 | |
| 		aio->orig_iocb = iocb;
 | |
| 		kiocb_clone(&aio->iocb, iocb, get_file(file));
 | |
| 		aio->iocb.ki_complete = backing_aio_rw_complete;
 | |
| 		refcount_set(&aio->ref, 2);
 | |
| 		ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
 | |
| 		backing_aio_put(aio);
 | |
| 		if (ret != -EIOCBQUEUED)
 | |
| 			backing_aio_cleanup(aio, ret);
 | |
| 	}
 | |
| out:
 | |
| 	revert_creds(old_cred);
 | |
| 
 | |
| 	if (ctx->accessed)
 | |
| 		ctx->accessed(ctx->user_file);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(backing_file_read_iter);
 | |
| 
 | |
| ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
 | |
| 				struct kiocb *iocb, int flags,
 | |
| 				struct backing_file_ctx *ctx)
 | |
| {
 | |
| 	const struct cred *old_cred;
 | |
| 	ssize_t ret;
 | |
| 
 | |
| 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
 | |
| 		return -EIO;
 | |
| 
 | |
| 	if (!iov_iter_count(iter))
 | |
| 		return 0;
 | |
| 
 | |
| 	ret = file_remove_privs(ctx->user_file);
 | |
| 	if (ret)
 | |
| 		return ret;
 | |
| 
 | |
| 	if (iocb->ki_flags & IOCB_DIRECT &&
 | |
| 	    !(file->f_mode & FMODE_CAN_ODIRECT))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	/*
 | |
| 	 * Stacked filesystems don't support deferred completions, don't copy
 | |
| 	 * this property in case it is set by the issuer.
 | |
| 	 */
 | |
| 	flags &= ~IOCB_DIO_CALLER_COMP;
 | |
| 
 | |
| 	old_cred = override_creds(ctx->cred);
 | |
| 	if (is_sync_kiocb(iocb)) {
 | |
| 		rwf_t rwf = iocb_to_rw_flags(flags);
 | |
| 
 | |
| 		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
 | |
| 		if (ctx->end_write)
 | |
| 			ctx->end_write(ctx->user_file);
 | |
| 	} else {
 | |
| 		struct backing_aio *aio;
 | |
| 
 | |
| 		ret = backing_aio_init_wq(iocb);
 | |
| 		if (ret)
 | |
| 			goto out;
 | |
| 
 | |
| 		ret = -ENOMEM;
 | |
| 		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
 | |
| 		if (!aio)
 | |
| 			goto out;
 | |
| 
 | |
| 		aio->orig_iocb = iocb;
 | |
| 		aio->end_write = ctx->end_write;
 | |
| 		kiocb_clone(&aio->iocb, iocb, get_file(file));
 | |
| 		aio->iocb.ki_flags = flags;
 | |
| 		aio->iocb.ki_complete = backing_aio_queue_completion;
 | |
| 		refcount_set(&aio->ref, 2);
 | |
| 		ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
 | |
| 		backing_aio_put(aio);
 | |
| 		if (ret != -EIOCBQUEUED)
 | |
| 			backing_aio_cleanup(aio, ret);
 | |
| 	}
 | |
| out:
 | |
| 	revert_creds(old_cred);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(backing_file_write_iter);
 | |
| 
 | |
| ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
 | |
| 				 struct pipe_inode_info *pipe, size_t len,
 | |
| 				 unsigned int flags,
 | |
| 				 struct backing_file_ctx *ctx)
 | |
| {
 | |
| 	const struct cred *old_cred;
 | |
| 	ssize_t ret;
 | |
| 
 | |
| 	if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
 | |
| 		return -EIO;
 | |
| 
 | |
| 	old_cred = override_creds(ctx->cred);
 | |
| 	ret = vfs_splice_read(in, ppos, pipe, len, flags);
 | |
| 	revert_creds(old_cred);
 | |
| 
 | |
| 	if (ctx->accessed)
 | |
| 		ctx->accessed(ctx->user_file);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(backing_file_splice_read);
 | |
| 
 | |
| ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
 | |
| 				  struct file *out, loff_t *ppos, size_t len,
 | |
| 				  unsigned int flags,
 | |
| 				  struct backing_file_ctx *ctx)
 | |
| {
 | |
| 	const struct cred *old_cred;
 | |
| 	ssize_t ret;
 | |
| 
 | |
| 	if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
 | |
| 		return -EIO;
 | |
| 
 | |
| 	ret = file_remove_privs(ctx->user_file);
 | |
| 	if (ret)
 | |
| 		return ret;
 | |
| 
 | |
| 	old_cred = override_creds(ctx->cred);
 | |
| 	file_start_write(out);
 | |
| 	ret = iter_file_splice_write(pipe, out, ppos, len, flags);
 | |
| 	file_end_write(out);
 | |
| 	revert_creds(old_cred);
 | |
| 
 | |
| 	if (ctx->end_write)
 | |
| 		ctx->end_write(ctx->user_file);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(backing_file_splice_write);
 | |
| 
 | |
| int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
 | |
| 		      struct backing_file_ctx *ctx)
 | |
| {
 | |
| 	const struct cred *old_cred;
 | |
| 	int ret;
 | |
| 
 | |
| 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
 | |
| 	    WARN_ON_ONCE(ctx->user_file != vma->vm_file))
 | |
| 		return -EIO;
 | |
| 
 | |
| 	if (!file->f_op->mmap)
 | |
| 		return -ENODEV;
 | |
| 
 | |
| 	vma_set_file(vma, file);
 | |
| 
 | |
| 	old_cred = override_creds(ctx->cred);
 | |
| 	ret = call_mmap(vma->vm_file, vma);
 | |
| 	revert_creds(old_cred);
 | |
| 
 | |
| 	if (ctx->accessed)
 | |
| 		ctx->accessed(ctx->user_file);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(backing_file_mmap);
 | |
| 
 | |
| static int __init backing_aio_init(void)
 | |
| {
 | |
| 	backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
 | |
| 	if (!backing_aio_cachep)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| fs_initcall(backing_aio_init);
 |