-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEq1nRK9aeMoq1VSgcnJ2qBz9kQNkFAmg4dIsACgkQnJ2qBz9k
 QNlf7ggAycYHUp9GEkIKtM+kDxSwjcOjJ581/wA3zi3HsgGt/lDDhgeYmJObvoSq
 g2XcScoMo3ZwjmsO9W5xmr+M9F42y6JIU3ZS4HxD8+TEelRDpL7134+ZIYll2Mdu
 Z+6TUknX5ve+caNPmJBE6fGYd0TiqKJknrZE4XB5g+1RF0J6/oFbwlW7n83/uM60
 MRzj5FyNAkYpL+qijAfXE/tZ4MCIvoi1aZoyQQ9bytRG8VJF4WBxPCWNlchceZoW
 ncLvXfiHm4W6wsyO5RHbtbyiEVPU//V/BH0blXyy9xDvPUDT50yplzR6XSlypxqO
 k67z7PG8Bm0afivqM5Yv8DNFnK/0gQ==
 =LuNr
 -----END PGP SIGNATURE-----

Merge tag 'fsnotify_for_v6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "Two fanotify cleanups and support for watching namespace-owned
  filesystems by namespace admins (most useful for being able to watch
  for new mounts / unmounts happening within a user namespace)"

* tag 'fsnotify_for_v6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fanotify: support watching filesystems and mounts inside userns
  fanotify: remove redundant permission checks
  fanotify: Drop use of flex array in fanotify_fh
This commit is contained in:
Linus Torvalds 2025-05-29 10:34:26 -07:00
commit db340159f1
5 changed files with 38 additions and 30 deletions

View file

@ -415,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
{
int dwords, type = 0;
char *ext_buf = NULL;
void *buf = fh->buf;
void *buf = fh + 1;
int err;
fh->type = FILEID_ROOT;
@ -1009,6 +1009,7 @@ finish:
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
put_user_ns(group->user_ns);
kfree(group->fanotify_data.merge_hash);
if (group->fanotify_data.ucounts)
dec_ucount(group->fanotify_data.ucounts,

View file

@ -25,7 +25,7 @@ enum {
* stored in either the first or last 2 dwords.
*/
#define FANOTIFY_INLINE_FH_LEN (3 << 2)
#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf)
#define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh)
/* Fixed size struct for file handle */
struct fanotify_fh {
@ -34,7 +34,6 @@ struct fanotify_fh {
#define FANOTIFY_FH_FLAG_EXT_BUF 1
u8 flags;
u8 pad;
unsigned char buf[];
} __aligned(4);
/* Variable size struct for dir file handle + child file handle + name */
@ -92,7 +91,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh)
BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4);
BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) >
FANOTIFY_INLINE_FH_LEN);
return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *));
return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *));
}
static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
@ -102,7 +101,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
static inline void *fanotify_fh_buf(struct fanotify_fh *fh)
{
return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf;
return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1;
}
static inline int fanotify_info_dir_fh_len(struct fanotify_info *info)
@ -278,7 +277,7 @@ static inline void fanotify_init_event(struct fanotify_event *event,
#define FANOTIFY_INLINE_FH(name, size) \
struct { \
struct fanotify_fh name; \
/* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \
/* Space for filehandle - access with fanotify_fh_buf() */ \
unsigned char _inline_fh_buf[size]; \
}

View file

@ -1334,6 +1334,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
* A group with FAN_UNLIMITED_MARKS does not contribute to mark count
* in the limited groups account.
*/
BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS));
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) &&
!inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS))
return ERR_PTR(-ENOSPC);
@ -1498,6 +1499,7 @@ static struct hlist_head *fanotify_alloc_merge_hash(void)
/* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct user_namespace *user_ns = current_user_ns();
struct fsnotify_group *group;
int f_flags, fd;
unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
@ -1512,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
/*
* An unprivileged user can setup an fanotify group with
* limited functionality - an unprivileged group is limited to
* notification events with file handles and it cannot use
* unlimited queue/marks.
* notification events with file handles or mount ids and it
* cannot use unlimited queue/marks.
*/
if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode)
if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) ||
!(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT)))
return -EPERM;
/*
@ -1594,8 +1597,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
}
/* Enforce groups limits per user in all containing user ns */
group->fanotify_data.ucounts = inc_ucount(current_user_ns(),
current_euid(),
group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(),
UCOUNT_FANOTIFY_GROUPS);
if (!group->fanotify_data.ucounts) {
fd = -EMFILE;
@ -1604,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.flags = flags | internal_flags;
group->memcg = get_mem_cgroup_from_mm(current->mm);
group->user_ns = get_user_ns(user_ns);
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
if (!group->fanotify_data.merge_hash) {
@ -1637,21 +1640,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
goto out_destroy_group;
}
BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
if (flags & FAN_UNLIMITED_QUEUE) {
fd = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group;
group->max_events = UINT_MAX;
} else {
group->max_events = fanotify_max_queued_events;
}
if (flags & FAN_UNLIMITED_MARKS) {
fd = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group;
}
if (flags & FAN_ENABLE_AUDIT) {
fd = -EPERM;
if (!capable(CAP_AUDIT_WRITE))
@ -1811,6 +1806,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
struct fsnotify_group *group;
struct path path;
struct fan_fsid __fsid, *fsid = NULL;
struct user_namespace *user_ns = NULL;
struct mnt_namespace *mntns;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
@ -1904,12 +1901,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/*
* An unprivileged user is not allowed to setup mount nor filesystem
* marks. This also includes setting up such marks by a group that
* was initialized by an unprivileged user.
* A user is allowed to setup sb/mount/mntns marks only if it is
* capable in the user ns where the group was created.
*/
if ((!capable(CAP_SYS_ADMIN) ||
FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) &&
if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) &&
mark_type != FAN_MARK_INODE)
return -EPERM;
@ -1988,18 +1983,31 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
fsid = &__fsid;
}
/* inode held in place by reference to path; group by fget on fd */
/*
* In addition to being capable in the user ns where group was created,
* the user also needs to be capable in the user ns associated with
* the filesystem or in the user ns associated with the mntns
* (when marking mntns).
*/
if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode;
obj = inode;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
user_ns = path.mnt->mnt_sb->s_user_ns;
obj = path.mnt;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
user_ns = path.mnt->mnt_sb->s_user_ns;
obj = path.mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
obj = mnt_ns_from_dentry(path.dentry);
mntns = mnt_ns_from_dentry(path.dentry);
user_ns = mntns->user_ns;
obj = mntns;
}
ret = -EPERM;
if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN))
goto path_put_and_out;
ret = -EINVAL;
if (!obj)
goto path_put_and_out;

View file

@ -38,8 +38,7 @@
FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS | \
FAN_REPORT_MNT)
FAN_UNLIMITED_MARKS)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@ -48,7 +47,7 @@
* so one of the flags for reporting file handles is required.
*/
#define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \
FANOTIFY_FID_BITS | \
FANOTIFY_FID_BITS | FAN_REPORT_MNT | \
FAN_CLOEXEC | FAN_NONBLOCK)
#define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \

View file

@ -250,6 +250,7 @@ struct fsnotify_group {
* full */
struct mem_cgroup *memcg; /* memcg to charge allocations */
struct user_namespace *user_ns; /* user ns where group was created */
/* groups can define private fields here or use the void *private */
union {