-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEq1nRK9aeMoq1VSgcnJ2qBz9kQNkFAmg4dIsACgkQnJ2qBz9k
 QNlf7ggAycYHUp9GEkIKtM+kDxSwjcOjJ581/wA3zi3HsgGt/lDDhgeYmJObvoSq
 g2XcScoMo3ZwjmsO9W5xmr+M9F42y6JIU3ZS4HxD8+TEelRDpL7134+ZIYll2Mdu
 Z+6TUknX5ve+caNPmJBE6fGYd0TiqKJknrZE4XB5g+1RF0J6/oFbwlW7n83/uM60
 MRzj5FyNAkYpL+qijAfXE/tZ4MCIvoi1aZoyQQ9bytRG8VJF4WBxPCWNlchceZoW
 ncLvXfiHm4W6wsyO5RHbtbyiEVPU//V/BH0blXyy9xDvPUDT50yplzR6XSlypxqO
 k67z7PG8Bm0afivqM5Yv8DNFnK/0gQ==
 =LuNr
 -----END PGP SIGNATURE-----

Merge tag 'fsnotify_for_v6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "Two fanotify cleanups and support for watching namespace-owned
  filesystems by namespace admins (most useful for being able to watch
  for new mounts / unmounts happening within a user namespace)"

* tag 'fsnotify_for_v6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fanotify: support watching filesystems and mounts inside userns
  fanotify: remove redundant permission checks
  fanotify: Drop use of flex array in fanotify_fh
This commit is contained in:
Linus Torvalds 2025-05-29 10:34:26 -07:00
commit db340159f1
5 changed files with 38 additions and 30 deletions

View file

@ -415,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
{ {
int dwords, type = 0; int dwords, type = 0;
char *ext_buf = NULL; char *ext_buf = NULL;
void *buf = fh->buf; void *buf = fh + 1;
int err; int err;
fh->type = FILEID_ROOT; fh->type = FILEID_ROOT;
@ -1009,6 +1009,7 @@ finish:
static void fanotify_free_group_priv(struct fsnotify_group *group) static void fanotify_free_group_priv(struct fsnotify_group *group)
{ {
put_user_ns(group->user_ns);
kfree(group->fanotify_data.merge_hash); kfree(group->fanotify_data.merge_hash);
if (group->fanotify_data.ucounts) if (group->fanotify_data.ucounts)
dec_ucount(group->fanotify_data.ucounts, dec_ucount(group->fanotify_data.ucounts,

View file

@ -25,7 +25,7 @@ enum {
* stored in either the first or last 2 dwords. * stored in either the first or last 2 dwords.
*/ */
#define FANOTIFY_INLINE_FH_LEN (3 << 2) #define FANOTIFY_INLINE_FH_LEN (3 << 2)
#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf) #define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh)
/* Fixed size struct for file handle */ /* Fixed size struct for file handle */
struct fanotify_fh { struct fanotify_fh {
@ -34,7 +34,6 @@ struct fanotify_fh {
#define FANOTIFY_FH_FLAG_EXT_BUF 1 #define FANOTIFY_FH_FLAG_EXT_BUF 1
u8 flags; u8 flags;
u8 pad; u8 pad;
unsigned char buf[];
} __aligned(4); } __aligned(4);
/* Variable size struct for dir file handle + child file handle + name */ /* Variable size struct for dir file handle + child file handle + name */
@ -92,7 +91,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh)
BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4); BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4);
BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) >
FANOTIFY_INLINE_FH_LEN); FANOTIFY_INLINE_FH_LEN);
return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *));
} }
static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
@ -102,7 +101,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
static inline void *fanotify_fh_buf(struct fanotify_fh *fh) static inline void *fanotify_fh_buf(struct fanotify_fh *fh)
{ {
return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1;
} }
static inline int fanotify_info_dir_fh_len(struct fanotify_info *info) static inline int fanotify_info_dir_fh_len(struct fanotify_info *info)
@ -278,7 +277,7 @@ static inline void fanotify_init_event(struct fanotify_event *event,
#define FANOTIFY_INLINE_FH(name, size) \ #define FANOTIFY_INLINE_FH(name, size) \
struct { \ struct { \
struct fanotify_fh name; \ struct fanotify_fh name; \
/* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ /* Space for filehandle - access with fanotify_fh_buf() */ \
unsigned char _inline_fh_buf[size]; \ unsigned char _inline_fh_buf[size]; \
} }

View file

@ -1334,6 +1334,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
* A group with FAN_UNLIMITED_MARKS does not contribute to mark count * A group with FAN_UNLIMITED_MARKS does not contribute to mark count
* in the limited groups account. * in the limited groups account.
*/ */
BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS));
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) &&
!inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS))
return ERR_PTR(-ENOSPC); return ERR_PTR(-ENOSPC);
@ -1498,6 +1499,7 @@ static struct hlist_head *fanotify_alloc_merge_hash(void)
/* fanotify syscalls */ /* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{ {
struct user_namespace *user_ns = current_user_ns();
struct fsnotify_group *group; struct fsnotify_group *group;
int f_flags, fd; int f_flags, fd;
unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
@ -1512,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
/* /*
* An unprivileged user can setup an fanotify group with * An unprivileged user can setup an fanotify group with
* limited functionality - an unprivileged group is limited to * limited functionality - an unprivileged group is limited to
* notification events with file handles and it cannot use * notification events with file handles or mount ids and it
* unlimited queue/marks. * cannot use unlimited queue/marks.
*/ */
if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) ||
!(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT)))
return -EPERM; return -EPERM;
/* /*
@ -1594,8 +1597,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
} }
/* Enforce groups limits per user in all containing user ns */ /* Enforce groups limits per user in all containing user ns */
group->fanotify_data.ucounts = inc_ucount(current_user_ns(), group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(),
current_euid(),
UCOUNT_FANOTIFY_GROUPS); UCOUNT_FANOTIFY_GROUPS);
if (!group->fanotify_data.ucounts) { if (!group->fanotify_data.ucounts) {
fd = -EMFILE; fd = -EMFILE;
@ -1604,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.flags = flags | internal_flags; group->fanotify_data.flags = flags | internal_flags;
group->memcg = get_mem_cgroup_from_mm(current->mm); group->memcg = get_mem_cgroup_from_mm(current->mm);
group->user_ns = get_user_ns(user_ns);
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
if (!group->fanotify_data.merge_hash) { if (!group->fanotify_data.merge_hash) {
@ -1637,21 +1640,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
goto out_destroy_group; goto out_destroy_group;
} }
BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
if (flags & FAN_UNLIMITED_QUEUE) { if (flags & FAN_UNLIMITED_QUEUE) {
fd = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group;
group->max_events = UINT_MAX; group->max_events = UINT_MAX;
} else { } else {
group->max_events = fanotify_max_queued_events; group->max_events = fanotify_max_queued_events;
} }
if (flags & FAN_UNLIMITED_MARKS) {
fd = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group;
}
if (flags & FAN_ENABLE_AUDIT) { if (flags & FAN_ENABLE_AUDIT) {
fd = -EPERM; fd = -EPERM;
if (!capable(CAP_AUDIT_WRITE)) if (!capable(CAP_AUDIT_WRITE))
@ -1811,6 +1806,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
struct fsnotify_group *group; struct fsnotify_group *group;
struct path path; struct path path;
struct fan_fsid __fsid, *fsid = NULL; struct fan_fsid __fsid, *fsid = NULL;
struct user_namespace *user_ns = NULL;
struct mnt_namespace *mntns;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
@ -1904,12 +1901,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
} }
/* /*
* An unprivileged user is not allowed to setup mount nor filesystem * A user is allowed to setup sb/mount/mntns marks only if it is
* marks. This also includes setting up such marks by a group that * capable in the user ns where the group was created.
* was initialized by an unprivileged user.
*/ */
if ((!capable(CAP_SYS_ADMIN) || if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) &&
FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) &&
mark_type != FAN_MARK_INODE) mark_type != FAN_MARK_INODE)
return -EPERM; return -EPERM;
@ -1988,18 +1983,31 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
fsid = &__fsid; fsid = &__fsid;
} }
/* inode held in place by reference to path; group by fget on fd */ /*
* In addition to being capable in the user ns where group was created,
* the user also needs to be capable in the user ns associated with
* the filesystem or in the user ns associated with the mntns
* (when marking mntns).
*/
if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode; inode = path.dentry->d_inode;
obj = inode; obj = inode;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
user_ns = path.mnt->mnt_sb->s_user_ns;
obj = path.mnt; obj = path.mnt;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
user_ns = path.mnt->mnt_sb->s_user_ns;
obj = path.mnt->mnt_sb; obj = path.mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
obj = mnt_ns_from_dentry(path.dentry); mntns = mnt_ns_from_dentry(path.dentry);
user_ns = mntns->user_ns;
obj = mntns;
} }
ret = -EPERM;
if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN))
goto path_put_and_out;
ret = -EINVAL; ret = -EINVAL;
if (!obj) if (!obj)
goto path_put_and_out; goto path_put_and_out;

View file

@ -38,8 +38,7 @@
FAN_REPORT_PIDFD | \ FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \ FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \ FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS | \ FAN_UNLIMITED_MARKS)
FAN_REPORT_MNT)
/* /*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@ -48,7 +47,7 @@
* so one of the flags for reporting file handles is required. * so one of the flags for reporting file handles is required.
*/ */
#define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ #define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \
FANOTIFY_FID_BITS | \ FANOTIFY_FID_BITS | FAN_REPORT_MNT | \
FAN_CLOEXEC | FAN_NONBLOCK) FAN_CLOEXEC | FAN_NONBLOCK)
#define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \

View file

@ -250,6 +250,7 @@ struct fsnotify_group {
* full */ * full */
struct mem_cgroup *memcg; /* memcg to charge allocations */ struct mem_cgroup *memcg; /* memcg to charge allocations */
struct user_namespace *user_ns; /* user ns where group was created */
/* groups can define private fields here or use the void *private */ /* groups can define private fields here or use the void *private */
union { union {