mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-11-01 09:13:37 +00:00 
			
		
		
		
	vfs: avoid duplicating creds in faccessat if possible
access(2) remains commonly used, for example on exec:
access("/etc/ld.so.preload", R_OK)
or when running gcc: strace -c gcc empty.c
  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
    0.00    0.000000           0        42        26 access
It falls down to do_faccessat without the AT_EACCESS flag, which in turn
results in allocation of new creds in order to modify fsuid/fsgid and
caps.  This is a very expensive process single-threaded and most notably
multi-threaded, with numerous structures getting refed and unrefed on
imminent new cred destruction.
Turns out for typical consumers the resulting creds would be identical
and this can be checked upfront, avoiding the hard work.
An access benchmark plugged into will-it-scale running on Cascade Lake
shows:
    test     proc     before       after
    access1     1    1310582     2908735    (+121%) # distinct files
    access1    24    4716491    63822173   (+1353%) # distinct files
    access2    24    2378041     5370335    (+125%) # same file
The above benchmarks are not integrated into will-it-scale, but can be
found in a pull request:
  https://github.com/antonblanchard/will-it-scale/pull/36/files
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									a4eecbae09
								
							
						
					
					
						commit
						981ee95cc1
					
				
					 1 changed files with 37 additions and 1 deletions
				
			
		
							
								
								
									
										38
									
								
								fs/open.c
									
										
									
									
									
								
							
							
						
						
									
										38
									
								
								fs/open.c
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -368,7 +368,37 @@ COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset
 | 
			
		|||
 * access() needs to use the real uid/gid, not the effective uid/gid.
 | 
			
		||||
 * We do this by temporarily clearing all FS-related capabilities and
 | 
			
		||||
 * switching the fsuid/fsgid around to the real ones.
 | 
			
		||||
 *
 | 
			
		||||
 * Creating new credentials is expensive, so we try to skip doing it,
 | 
			
		||||
 * which we can if the result would match what we already got.
 | 
			
		||||
 */
 | 
			
		||||
static bool access_need_override_creds(int flags)
 | 
			
		||||
{
 | 
			
		||||
	const struct cred *cred;
 | 
			
		||||
 | 
			
		||||
	if (flags & AT_EACCESS)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	cred = current_cred();
 | 
			
		||||
	if (!uid_eq(cred->fsuid, cred->uid) ||
 | 
			
		||||
	    !gid_eq(cred->fsgid, cred->gid))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 | 
			
		||||
		kuid_t root_uid = make_kuid(cred->user_ns, 0);
 | 
			
		||||
		if (!uid_eq(cred->uid, root_uid)) {
 | 
			
		||||
			if (!cap_isclear(cred->cap_effective))
 | 
			
		||||
				return true;
 | 
			
		||||
		} else {
 | 
			
		||||
			if (!cap_isidentical(cred->cap_effective,
 | 
			
		||||
			    cred->cap_permitted))
 | 
			
		||||
				return true;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct cred *access_override_creds(void)
 | 
			
		||||
{
 | 
			
		||||
	const struct cred *old_cred;
 | 
			
		||||
| 
						 | 
				
			
			@ -378,6 +408,12 @@ static const struct cred *access_override_creds(void)
 | 
			
		|||
	if (!override_cred)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * XXX access_need_override_creds performs checks in hopes of skipping
 | 
			
		||||
	 * this work. Make sure it stays in sync if making any changes in this
 | 
			
		||||
	 * routine.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	override_cred->fsuid = override_cred->uid;
 | 
			
		||||
	override_cred->fsgid = override_cred->gid;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -437,7 +473,7 @@ static long do_faccessat(int dfd, const char __user *filename, int mode, int fla
 | 
			
		|||
	if (flags & AT_EMPTY_PATH)
 | 
			
		||||
		lookup_flags |= LOOKUP_EMPTY;
 | 
			
		||||
 | 
			
		||||
	if (!(flags & AT_EACCESS)) {
 | 
			
		||||
	if (access_need_override_creds(flags)) {
 | 
			
		||||
		old_cred = access_override_creds();
 | 
			
		||||
		if (!old_cred)
 | 
			
		||||
			return -ENOMEM;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue