fanotify: notify on mount attach and detach

Add notifications for attaching and detaching mounts.  The following new
event masks are added:

  FAN_MNT_ATTACH  - Mount was attached
  FAN_MNT_DETACH  - Mount was detached

If a mount is moved, then the event is reported with (FAN_MNT_ATTACH |
FAN_MNT_DETACH).

These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing
these fields identifying the affected mounts:

  __u64 mnt_id    - the ID of the mount (see statmount(2))

FAN_REPORT_MNT must be supplied to fanotify_init() to receive these events
and no other type of event can be received with this report type.

Marks are added with FAN_MARK_MNTNS, which records the mount namespace from
an nsfs file (e.g. /proc/self/ns/mnt).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20250129165803.72138-3-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Miklos Szeredi 2025-01-29 17:58:00 +01:00 committed by Christian Brauner
parent b944249bce
commit 0f46d81f2b
No known key found for this signature in database
GPG key ID: 91C61BC06578DCA2
8 changed files with 164 additions and 24 deletions

View file

@ -181,3 +181,5 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);

View file

@ -2145,16 +2145,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
}
}
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
{
if (!is_mnt_ns_file(dentry))
return NULL;
return to_mnt_ns(get_proc_ns(dentry->d_inode));
}
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct mnt_namespace *mnt_ns;
if (!is_mnt_ns_file(dentry))
struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);
if (!mnt_ns)
return false;
mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}

View file

@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new));
case FANOTIFY_EVENT_TYPE_MNT:
return false;
default:
WARN_ON_ONCE(1);
}
@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
if (!fid_mode) {
if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
if (data_type != FSNOTIFY_EVENT_MNT)
return 0;
} else if (!fid_mode) {
/* Do we have path to open a file descriptor? */
if (!path)
return 0;
@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}
static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
{
struct fanotify_mnt_event *pevent;
pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
if (!pevent)
return NULL;
pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
pevent->mnt_id = mnt_id;
return &pevent->fae;
}
static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
int data_type,
gfp_t gfp)
@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
fid_mode);
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
struct mem_cgroup *old_memcg;
struct dentry *moved = NULL;
struct inode *child = NULL;
@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
moved, &hash, gfp);
} else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
} else {
} else if (path) {
event = fanotify_alloc_path_event(path, &hash, gfp);
} else if (mnt_id) {
event = fanotify_alloc_mnt_event(mnt_id, gfp);
} else {
WARN_ON_ONCE(1);
}
if (!event)
@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
mempool_free(fee, &group->fanotify_data.error_events_pool);
}
static void fanotify_free_mnt_event(struct fanotify_event *event)
{
kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
}
static void fanotify_free_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
fanotify_free_error_event(group, event);
break;
case FANOTIFY_EVENT_TYPE_MNT:
fanotify_free_mnt_event(event);
break;
default:
WARN_ON_ONCE(1);
}

View file

@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
extern struct kmem_cache *fanotify_mnt_event_cachep;
/* Possible states of the permission event */
enum {
@ -244,6 +245,7 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
FANOTIFY_EVENT_TYPE_MNT,
__FANOTIFY_EVENT_TYPE_NUM
};
@ -409,12 +411,23 @@ struct fanotify_path_event {
struct path path;
};
struct fanotify_mnt_event {
struct fanotify_event fae;
u64 mnt_id;
};
static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
return container_of(event, struct fanotify_path_event, fae);
}
static inline struct fanotify_mnt_event *
FANOTIFY_ME(struct fanotify_event *event)
{
return container_of(event, struct fanotify_mnt_event, fae);
}
/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}
static inline bool fanotify_is_mnt_event(u32 mask)
{
return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
}
static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)

View file

@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
(sizeof(struct fanotify_event_info_error))
#define FANOTIFY_RANGE_INFO_LEN \
(sizeof(struct fanotify_event_info_range))
#define FANOTIFY_MNT_INFO_LEN \
(sizeof(struct fanotify_event_info_mnt))
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
fh_len = fanotify_event_object_fh_len(event);
event_len += fanotify_fid_info_len(fh_len, dot_len);
}
if (fanotify_is_mnt_event(event->mask))
event_len += FANOTIFY_MNT_INFO_LEN;
if (info_mode & FAN_REPORT_PIDFD)
event_len += FANOTIFY_PIDFD_INFO_LEN;
@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
static size_t copy_mnt_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
struct fanotify_event_info_mnt info = { };
info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
info.hdr.len = FANOTIFY_MNT_INFO_LEN;
if (WARN_ON(count < info.hdr.len))
return -EFAULT;
info.mnt_id = FANOTIFY_ME(event)->mnt_id;
if (copy_to_user(buf, &info, sizeof(info)))
return -EFAULT;
return info.hdr.len;
}
static size_t copy_error_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
total_bytes += ret;
}
if (fanotify_is_mnt_event(event->mask)) {
ret = copy_mnt_info_to_user(event, buf, count);
if (ret < 0)
return ret;
buf += ret;
count -= ret;
total_bytes += ret;
}
return total_bytes;
}
@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
return -EINVAL;
/* Don't allow mixing mnt events with inode events for now */
if (flags & FAN_REPORT_MNT) {
if (class != FAN_CLASS_NOTIF)
return -EINVAL;
if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
return -EINVAL;
}
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct path path;
struct fan_fsid __fsid, *fsid = NULL;
@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
void *obj;
void *obj = NULL;
u32 umask = 0;
int ret;
@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
case FAN_MARK_FILESYSTEM:
obj_type = FSNOTIFY_OBJ_TYPE_SB;
break;
case FAN_MARK_MNTNS:
obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
break;
default:
return -EINVAL;
}
@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
group = fd_file(f)->private_data;
/* Only report mount events on mnt namespace */
if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
if (mask & ~FANOTIFY_MOUNT_EVENTS)
return -EINVAL;
if (mark_type != FAN_MARK_MNTNS)
return -EINVAL;
} else {
if (mask & FANOTIFY_MOUNT_EVENTS)
return -EINVAL;
if (mark_type == FAN_MARK_MNTNS)
return -EINVAL;
}
/*
* An unprivileged user is not allowed to setup mount nor filesystem
* marks. This also includes setting up such marks by a group that
@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* point.
*/
fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
(!fid_mode || mark_type == FAN_MARK_MOUNT))
return -EINVAL;
@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* inode held in place by reference to path; group by fget on fd */
if (mark_type == FAN_MARK_INODE) {
if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode;
obj = inode;
} else {
mnt = path.mnt;
if (mark_type == FAN_MARK_MOUNT)
obj = mnt;
else
obj = mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
obj = path.mnt;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
obj = path.mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
obj = mnt_ns_from_dentry(path.dentry);
}
ret = -EINVAL;
if (!obj)
goto path_put_and_out;
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
*/
if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
ret = mnt ? -EINVAL : -EISDIR;
ret = !inode ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (ignore == FAN_MARK_IGNORE &&
(mnt || S_ISDIR(inode->i_mode)))
(!inode || S_ISDIR(inode->i_mode)))
goto path_put_and_out;
ret = 0;
@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
if (!inode || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
umask = FAN_EVENT_ON_CHILD;
/*
@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void)
fanotify_perm_event_cachep =
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
}
fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =

View file

@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
sb->s_dev, mflags, mark->mask, mark->ignore_mask);
} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
}
}

View file

@ -25,7 +25,7 @@
#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
@ -38,7 +38,8 @@
FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS)
FAN_UNLIMITED_MARKS | \
FAN_REPORT_MNT)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@ -58,7 +59,7 @@
#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM)
FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
FAN_MARK_FLUSH)
@ -109,10 +110,13 @@
/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
/* Events that user can request to be notified on */
#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
FANOTIFY_INODE_EVENTS | \
FANOTIFY_ERROR_EVENTS)
FANOTIFY_ERROR_EVENTS | \
FANOTIFY_MOUNT_EVENTS)
/* Extra flags that may be reported with event or control handling of events */
#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR)

View file

@ -28,6 +28,8 @@
/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
@ -64,6 +66,7 @@
#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
@ -94,6 +97,7 @@
#define FAN_MARK_INODE 0x00000000
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_FILESYSTEM 0x00000100
#define FAN_MARK_MNTNS 0x00000110
/*
* Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
@ -147,6 +151,7 @@ struct fanotify_event_metadata {
#define FAN_EVENT_INFO_TYPE_PIDFD 4
#define FAN_EVENT_INFO_TYPE_ERROR 5
#define FAN_EVENT_INFO_TYPE_RANGE 6
#define FAN_EVENT_INFO_TYPE_MNT 7
/* Special info types for FAN_RENAME */
#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
@ -200,6 +205,11 @@ struct fanotify_event_info_range {
__u64 count;
};
struct fanotify_event_info_mnt {
struct fanotify_event_info_header hdr;
__u64 mnt_id;
};
/*
* User space may need to record additional information about its decision.
* The extra information type records what kind of information is included.