mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
Merge patch series "net, pidfs: enable handing out pidfds for reaped sk->sk_peer_pid"
Christian Brauner <brauner@kernel.org> says: SO_PEERPIDFD currently doesn't support handing out pidfds if the sk->sk_peer_pid thread-group leader has already been reaped. In this case it currently returns EINVAL. Userspace still wants to get a pidfd for a reaped process to have a stable handle it can pass on. This is especially useful now that it is possible to retrieve exit information through a pidfd via the PIDFD_GET_INFO ioctl()'s PIDFD_INFO_EXIT flag. Another summary has been provided by David in [1]: > A pidfd can outlive the task it refers to, and thus user-space must > already be prepared that the task underlying a pidfd is gone at the time > they get their hands on the pidfd. For instance, resolving the pidfd to > a PID via the fdinfo must be prepared to read `-1`. > > Despite user-space knowing that a pidfd might be stale, several kernel > APIs currently add another layer that checks for this. In particular, > SO_PEERPIDFD returns `EINVAL` if the peer-task was already reaped, > but returns a stale pidfd if the task is reaped immediately after the > respective alive-check. > > This has the unfortunate effect that user-space now has two ways to > check for the exact same scenario: A syscall might return > EINVAL/ESRCH/... *or* the pidfd might be stale, even though there is no > particular reason to distinguish both cases. This also propagates > through user-space APIs, which pass on pidfds. They must be prepared to > pass on `-1` *or* the pidfd, because there is no guaranteed way to get a > stale pidfd from the kernel. > Userspace must already deal with a pidfd referring to a reaped task as > the task may exit and get reaped at any time will there are still many > pidfds referring to it. In order to allow handing out reaped pidfd SO_PEERPIDFD needs to ensure that PIDFD_INFO_EXIT information is available whenever a pidfd for a reaped task is created by PIDFD_INFO_EXIT. The uapi promises that reaped pidfds are only handed out if it is guaranteed that the caller sees the exit information: TEST_F(pidfd_info, success_reaped) { struct pidfd_info info = { .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT, }; /* * Process has already been reaped and PIDFD_INFO_EXIT been set. * Verify that we can retrieve the exit status of the process. */ ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0); ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); ASSERT_TRUE(WIFEXITED(info.exit_code)); ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); } To hand out pidfds for reaped processes we thus allocate a pidfs entry for the relevant sk->sk_peer_pid at the time the sk->sk_peer_pid is stashed and drop it when the socket is destroyed. This guarantees that exit information will always be recorded for the sk->sk_peer_pid task and we can hand out pidfds for reaped processes. * patches from https://lore.kernel.org/20250425-work-pidfs-net-v2-0-450a19461e75@kernel.org: net, pidfs: enable handing out pidfds for reaped sk->sk_peer_pid pidfs: get rid of __pidfd_prepare() net, pidfs: prepare for handing out pidfds for reaped sk->sk_peer_pid pidfs: register pid in pidfs Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-0-450a19461e75@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
commit
923ea4d448
7 changed files with 192 additions and 86 deletions
81
fs/pidfs.c
81
fs/pidfs.c
|
@ -768,7 +768,7 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
|
||||||
{
|
{
|
||||||
enum pid_type type;
|
enum pid_type type;
|
||||||
|
|
||||||
if (flags & PIDFD_CLONE)
|
if (flags & PIDFD_STALE)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -777,10 +777,14 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
|
||||||
* pidfd has been allocated perform another check that the pid
|
* pidfd has been allocated perform another check that the pid
|
||||||
* is still alive. If it is exit information is available even
|
* is still alive. If it is exit information is available even
|
||||||
* if the task gets reaped before the pidfd is returned to
|
* if the task gets reaped before the pidfd is returned to
|
||||||
* userspace. The only exception is PIDFD_CLONE where no task
|
* userspace. The only exception are indicated by PIDFD_STALE:
|
||||||
* linkage has been established for @pid yet and the kernel is
|
*
|
||||||
* in the middle of process creation so there's nothing for
|
* (1) The kernel is in the middle of task creation and thus no
|
||||||
* pidfs to miss.
|
* task linkage has been established yet.
|
||||||
|
* (2) The caller knows @pid has been registered in pidfs at a
|
||||||
|
* time when the task was still alive.
|
||||||
|
*
|
||||||
|
* In both cases exit information will have been reported.
|
||||||
*/
|
*/
|
||||||
if (flags & PIDFD_THREAD)
|
if (flags & PIDFD_THREAD)
|
||||||
type = PIDTYPE_PID;
|
type = PIDTYPE_PID;
|
||||||
|
@ -874,11 +878,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure that PIDFD_CLONE can be passed as a flag without
|
* Ensure that PIDFD_STALE can be passed as a flag without
|
||||||
* overloading other uapi pidfd flags.
|
* overloading other uapi pidfd flags.
|
||||||
*/
|
*/
|
||||||
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD);
|
BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
|
||||||
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK);
|
BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
|
||||||
|
|
||||||
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
|
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
@ -887,7 +891,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
|
||||||
if (!pidfs_pid_valid(pid, &path, flags))
|
if (!pidfs_pid_valid(pid, &path, flags))
|
||||||
return ERR_PTR(-ESRCH);
|
return ERR_PTR(-ESRCH);
|
||||||
|
|
||||||
flags &= ~PIDFD_CLONE;
|
flags &= ~PIDFD_STALE;
|
||||||
pidfd_file = dentry_open(&path, flags, current_cred());
|
pidfd_file = dentry_open(&path, flags, current_cred());
|
||||||
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
|
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
|
||||||
if (!IS_ERR(pidfd_file))
|
if (!IS_ERR(pidfd_file))
|
||||||
|
@ -896,6 +900,65 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
|
||||||
return pidfd_file;
|
return pidfd_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pidfs_register_pid - register a struct pid in pidfs
|
||||||
|
* @pid: pid to pin
|
||||||
|
*
|
||||||
|
* Register a struct pid in pidfs. Needs to be paired with
|
||||||
|
* pidfs_put_pid() to not risk leaking the pidfs dentry and inode.
|
||||||
|
*
|
||||||
|
* Return: On success zero, on error a negative error code is returned.
|
||||||
|
*/
|
||||||
|
int pidfs_register_pid(struct pid *pid)
|
||||||
|
{
|
||||||
|
struct path path __free(path_put) = {};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
might_sleep();
|
||||||
|
|
||||||
|
if (!pid)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
|
||||||
|
if (unlikely(ret))
|
||||||
|
return ret;
|
||||||
|
/* Keep the dentry and only put the reference to the mount. */
|
||||||
|
path.dentry = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pidfs_get_pid - pin a struct pid through pidfs
|
||||||
|
* @pid: pid to pin
|
||||||
|
*
|
||||||
|
* Similar to pidfs_register_pid() but only valid if the caller knows
|
||||||
|
* there's a reference to the @pid through a dentry already that can't
|
||||||
|
* go away.
|
||||||
|
*/
|
||||||
|
void pidfs_get_pid(struct pid *pid)
|
||||||
|
{
|
||||||
|
if (!pid)
|
||||||
|
return;
|
||||||
|
WARN_ON_ONCE(!stashed_dentry_get(&pid->stashed));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pidfs_put_pid - drop a pidfs reference
|
||||||
|
* @pid: pid to drop
|
||||||
|
*
|
||||||
|
* Drop a reference to @pid via pidfs. This is only safe if the
|
||||||
|
* reference has been taken via pidfs_get_pid().
|
||||||
|
*/
|
||||||
|
void pidfs_put_pid(struct pid *pid)
|
||||||
|
{
|
||||||
|
might_sleep();
|
||||||
|
|
||||||
|
if (!pid)
|
||||||
|
return;
|
||||||
|
VFS_WARN_ON_ONCE(!pid->stashed);
|
||||||
|
dput(pid->stashed);
|
||||||
|
}
|
||||||
|
|
||||||
static void pidfs_inode_init_once(void *data)
|
static void pidfs_inode_init_once(void *data)
|
||||||
{
|
{
|
||||||
struct pidfs_inode *pi = data;
|
struct pidfs_inode *pi = data;
|
||||||
|
|
|
@ -77,7 +77,7 @@ struct file;
|
||||||
struct pid *pidfd_pid(const struct file *file);
|
struct pid *pidfd_pid(const struct file *file);
|
||||||
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
|
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
|
||||||
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
|
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
|
||||||
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
|
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file);
|
||||||
void do_notify_pidfd(struct task_struct *task);
|
void do_notify_pidfd(struct task_struct *task);
|
||||||
|
|
||||||
static inline struct pid *get_pid(struct pid *pid)
|
static inline struct pid *get_pid(struct pid *pid)
|
||||||
|
|
|
@ -8,5 +8,8 @@ void pidfs_add_pid(struct pid *pid);
|
||||||
void pidfs_remove_pid(struct pid *pid);
|
void pidfs_remove_pid(struct pid *pid);
|
||||||
void pidfs_exit(struct task_struct *tsk);
|
void pidfs_exit(struct task_struct *tsk);
|
||||||
extern const struct dentry_operations pidfs_dentry_operations;
|
extern const struct dentry_operations pidfs_dentry_operations;
|
||||||
|
int pidfs_register_pid(struct pid *pid);
|
||||||
|
void pidfs_get_pid(struct pid *pid);
|
||||||
|
void pidfs_put_pid(struct pid *pid);
|
||||||
|
|
||||||
#endif /* _LINUX_PID_FS_H */
|
#endif /* _LINUX_PID_FS_H */
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
#define PIDFD_THREAD O_EXCL
|
#define PIDFD_THREAD O_EXCL
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#define PIDFD_CLONE CLONE_PIDFD
|
#define PIDFD_STALE CLONE_PIDFD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Flags for pidfd_send_signal(). */
|
/* Flags for pidfd_send_signal(). */
|
||||||
|
|
|
@ -2035,55 +2035,11 @@ static inline void rcu_copy_process(struct task_struct *p)
|
||||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
|
|
||||||
* @pid: the struct pid for which to create a pidfd
|
|
||||||
* @flags: flags of the new @pidfd
|
|
||||||
* @ret: Where to return the file for the pidfd.
|
|
||||||
*
|
|
||||||
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
|
|
||||||
* caller's file descriptor table. The pidfd is reserved but not installed yet.
|
|
||||||
*
|
|
||||||
* The helper doesn't perform checks on @pid which makes it useful for pidfds
|
|
||||||
* created via CLONE_PIDFD where @pid has no task attached when the pidfd and
|
|
||||||
* pidfd file are prepared.
|
|
||||||
*
|
|
||||||
* If this function returns successfully the caller is responsible to either
|
|
||||||
* call fd_install() passing the returned pidfd and pidfd file as arguments in
|
|
||||||
* order to install the pidfd into its file descriptor table or they must use
|
|
||||||
* put_unused_fd() and fput() on the returned pidfd and pidfd file
|
|
||||||
* respectively.
|
|
||||||
*
|
|
||||||
* This function is useful when a pidfd must already be reserved but there
|
|
||||||
* might still be points of failure afterwards and the caller wants to ensure
|
|
||||||
* that no pidfd is leaked into its file descriptor table.
|
|
||||||
*
|
|
||||||
* Return: On success, a reserved pidfd is returned from the function and a new
|
|
||||||
* pidfd file is returned in the last argument to the function. On
|
|
||||||
* error, a negative error code is returned from the function and the
|
|
||||||
* last argument remains unchanged.
|
|
||||||
*/
|
|
||||||
static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
|
|
||||||
{
|
|
||||||
struct file *pidfd_file;
|
|
||||||
|
|
||||||
CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
|
|
||||||
if (pidfd < 0)
|
|
||||||
return pidfd;
|
|
||||||
|
|
||||||
pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
|
|
||||||
if (IS_ERR(pidfd_file))
|
|
||||||
return PTR_ERR(pidfd_file);
|
|
||||||
|
|
||||||
*ret = pidfd_file;
|
|
||||||
return take_fd(pidfd);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
|
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
|
||||||
* @pid: the struct pid for which to create a pidfd
|
* @pid: the struct pid for which to create a pidfd
|
||||||
* @flags: flags of the new @pidfd
|
* @flags: flags of the new @pidfd
|
||||||
* @ret: Where to return the pidfd.
|
* @ret_file: return the new pidfs file
|
||||||
*
|
*
|
||||||
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
|
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
|
||||||
* caller's file descriptor table. The pidfd is reserved but not installed yet.
|
* caller's file descriptor table. The pidfd is reserved but not installed yet.
|
||||||
|
@ -2106,16 +2062,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
|
||||||
* error, a negative error code is returned from the function and the
|
* error, a negative error code is returned from the function and the
|
||||||
* last argument remains unchanged.
|
* last argument remains unchanged.
|
||||||
*/
|
*/
|
||||||
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
|
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file)
|
||||||
{
|
{
|
||||||
|
struct file *pidfs_file;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* While holding the pidfd waitqueue lock removing the task
|
* PIDFD_STALE is only allowed to be passed if the caller knows
|
||||||
* linkage for the thread-group leader pid (PIDTYPE_TGID) isn't
|
* that @pid is already registered in pidfs and thus
|
||||||
* possible. Thus, if there's still task linkage for PIDTYPE_PID
|
* PIDFD_INFO_EXIT information is guaranteed to be available.
|
||||||
* not having thread-group leader linkage for the pid means it
|
|
||||||
* wasn't a thread-group leader in the first place.
|
|
||||||
*/
|
*/
|
||||||
scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) {
|
if (!(flags & PIDFD_STALE)) {
|
||||||
|
/*
|
||||||
|
* While holding the pidfd waitqueue lock removing the
|
||||||
|
* task linkage for the thread-group leader pid
|
||||||
|
* (PIDTYPE_TGID) isn't possible. Thus, if there's still
|
||||||
|
* task linkage for PIDTYPE_PID not having thread-group
|
||||||
|
* leader linkage for the pid means it wasn't a
|
||||||
|
* thread-group leader in the first place.
|
||||||
|
*/
|
||||||
|
guard(spinlock_irq)(&pid->wait_pidfd.lock);
|
||||||
|
|
||||||
/* Task has already been reaped. */
|
/* Task has already been reaped. */
|
||||||
if (!pid_has_task(pid, PIDTYPE_PID))
|
if (!pid_has_task(pid, PIDTYPE_PID))
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
|
@ -2128,7 +2094,16 @@ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return __pidfd_prepare(pid, flags, ret);
|
CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
|
||||||
|
if (pidfd < 0)
|
||||||
|
return pidfd;
|
||||||
|
|
||||||
|
pidfs_file = pidfs_alloc_file(pid, flags | O_RDWR);
|
||||||
|
if (IS_ERR(pidfs_file))
|
||||||
|
return PTR_ERR(pidfs_file);
|
||||||
|
|
||||||
|
*ret_file = pidfs_file;
|
||||||
|
return take_fd(pidfd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __delayed_free_task(struct rcu_head *rhp)
|
static void __delayed_free_task(struct rcu_head *rhp)
|
||||||
|
@ -2477,7 +2452,7 @@ __latent_entropy struct task_struct *copy_process(
|
||||||
* Note that no task has been attached to @pid yet indicate
|
* Note that no task has been attached to @pid yet indicate
|
||||||
* that via CLONE_PIDFD.
|
* that via CLONE_PIDFD.
|
||||||
*/
|
*/
|
||||||
retval = __pidfd_prepare(pid, flags | PIDFD_CLONE, &pidfile);
|
retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
|
||||||
if (retval < 0)
|
if (retval < 0)
|
||||||
goto bad_fork_free_pid;
|
goto bad_fork_free_pid;
|
||||||
pidfd = retval;
|
pidfd = retval;
|
||||||
|
|
|
@ -148,6 +148,8 @@
|
||||||
|
|
||||||
#include <linux/ethtool.h>
|
#include <linux/ethtool.h>
|
||||||
|
|
||||||
|
#include <uapi/linux/pidfd.h>
|
||||||
|
|
||||||
#include "dev.h"
|
#include "dev.h"
|
||||||
|
|
||||||
static DEFINE_MUTEX(proto_list_mutex);
|
static DEFINE_MUTEX(proto_list_mutex);
|
||||||
|
@ -1879,6 +1881,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
|
||||||
{
|
{
|
||||||
struct pid *peer_pid;
|
struct pid *peer_pid;
|
||||||
struct file *pidfd_file = NULL;
|
struct file *pidfd_file = NULL;
|
||||||
|
unsigned int flags = 0;
|
||||||
int pidfd;
|
int pidfd;
|
||||||
|
|
||||||
if (len > sizeof(pidfd))
|
if (len > sizeof(pidfd))
|
||||||
|
@ -1891,18 +1894,17 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
|
||||||
if (!peer_pid)
|
if (!peer_pid)
|
||||||
return -ENODATA;
|
return -ENODATA;
|
||||||
|
|
||||||
pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
|
/* The use of PIDFD_STALE requires stashing of struct pid
|
||||||
|
* on pidfs with pidfs_register_pid() and only AF_UNIX
|
||||||
|
* were prepared for this.
|
||||||
|
*/
|
||||||
|
if (sk->sk_family == AF_UNIX)
|
||||||
|
flags = PIDFD_STALE;
|
||||||
|
|
||||||
|
pidfd = pidfd_prepare(peer_pid, flags, &pidfd_file);
|
||||||
put_pid(peer_pid);
|
put_pid(peer_pid);
|
||||||
if (pidfd < 0) {
|
if (pidfd < 0)
|
||||||
/*
|
|
||||||
* dbus-broker relies on -EINVAL being returned
|
|
||||||
* to indicate ESRCH. Paper over it until this
|
|
||||||
* is fixed in userspace.
|
|
||||||
*/
|
|
||||||
if (pidfd == -ESRCH)
|
|
||||||
pidfd = -EINVAL;
|
|
||||||
return pidfd;
|
return pidfd;
|
||||||
}
|
|
||||||
|
|
||||||
if (copy_to_sockptr(optval, &pidfd, len) ||
|
if (copy_to_sockptr(optval, &pidfd, len) ||
|
||||||
copy_to_sockptr(optlen, &len, sizeof(int))) {
|
copy_to_sockptr(optlen, &len, sizeof(int))) {
|
||||||
|
|
|
@ -100,6 +100,7 @@
|
||||||
#include <linux/splice.h>
|
#include <linux/splice.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/pidfs.h>
|
||||||
#include <net/af_unix.h>
|
#include <net/af_unix.h>
|
||||||
#include <net/net_namespace.h>
|
#include <net/net_namespace.h>
|
||||||
#include <net/scm.h>
|
#include <net/scm.h>
|
||||||
|
@ -643,6 +644,9 @@ static void unix_sock_destructor(struct sock *sk)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sk->sk_peer_pid)
|
||||||
|
pidfs_put_pid(sk->sk_peer_pid);
|
||||||
|
|
||||||
if (u->addr)
|
if (u->addr)
|
||||||
unix_release_addr(u->addr);
|
unix_release_addr(u->addr);
|
||||||
|
|
||||||
|
@ -734,13 +738,48 @@ static void unix_release_sock(struct sock *sk, int embrion)
|
||||||
unix_gc(); /* Garbage collect fds */
|
unix_gc(); /* Garbage collect fds */
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_peercred(struct sock *sk)
|
struct unix_peercred {
|
||||||
|
struct pid *peer_pid;
|
||||||
|
const struct cred *peer_cred;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int prepare_peercred(struct unix_peercred *peercred)
|
||||||
{
|
{
|
||||||
sk->sk_peer_pid = get_pid(task_tgid(current));
|
struct pid *pid;
|
||||||
sk->sk_peer_cred = get_current_cred();
|
int err;
|
||||||
|
|
||||||
|
pid = task_tgid(current);
|
||||||
|
err = pidfs_register_pid(pid);
|
||||||
|
if (likely(!err)) {
|
||||||
|
peercred->peer_pid = get_pid(pid);
|
||||||
|
peercred->peer_cred = get_current_cred();
|
||||||
|
}
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_peercred(struct sock *sk)
|
static void drop_peercred(struct unix_peercred *peercred)
|
||||||
|
{
|
||||||
|
const struct cred *cred = NULL;
|
||||||
|
struct pid *pid = NULL;
|
||||||
|
|
||||||
|
might_sleep();
|
||||||
|
|
||||||
|
swap(peercred->peer_pid, pid);
|
||||||
|
swap(peercred->peer_cred, cred);
|
||||||
|
|
||||||
|
pidfs_put_pid(pid);
|
||||||
|
put_pid(pid);
|
||||||
|
put_cred(cred);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void init_peercred(struct sock *sk,
|
||||||
|
const struct unix_peercred *peercred)
|
||||||
|
{
|
||||||
|
sk->sk_peer_pid = peercred->peer_pid;
|
||||||
|
sk->sk_peer_cred = peercred->peer_cred;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
|
||||||
{
|
{
|
||||||
const struct cred *old_cred;
|
const struct cred *old_cred;
|
||||||
struct pid *old_pid;
|
struct pid *old_pid;
|
||||||
|
@ -748,11 +787,11 @@ static void update_peercred(struct sock *sk)
|
||||||
spin_lock(&sk->sk_peer_lock);
|
spin_lock(&sk->sk_peer_lock);
|
||||||
old_pid = sk->sk_peer_pid;
|
old_pid = sk->sk_peer_pid;
|
||||||
old_cred = sk->sk_peer_cred;
|
old_cred = sk->sk_peer_cred;
|
||||||
init_peercred(sk);
|
init_peercred(sk, peercred);
|
||||||
spin_unlock(&sk->sk_peer_lock);
|
spin_unlock(&sk->sk_peer_lock);
|
||||||
|
|
||||||
put_pid(old_pid);
|
peercred->peer_pid = old_pid;
|
||||||
put_cred(old_cred);
|
peercred->peer_cred = old_cred;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void copy_peercred(struct sock *sk, struct sock *peersk)
|
static void copy_peercred(struct sock *sk, struct sock *peersk)
|
||||||
|
@ -761,6 +800,7 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
|
||||||
|
|
||||||
spin_lock(&sk->sk_peer_lock);
|
spin_lock(&sk->sk_peer_lock);
|
||||||
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
|
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
|
||||||
|
pidfs_get_pid(sk->sk_peer_pid);
|
||||||
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
|
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
|
||||||
spin_unlock(&sk->sk_peer_lock);
|
spin_unlock(&sk->sk_peer_lock);
|
||||||
}
|
}
|
||||||
|
@ -770,6 +810,7 @@ static int unix_listen(struct socket *sock, int backlog)
|
||||||
int err;
|
int err;
|
||||||
struct sock *sk = sock->sk;
|
struct sock *sk = sock->sk;
|
||||||
struct unix_sock *u = unix_sk(sk);
|
struct unix_sock *u = unix_sk(sk);
|
||||||
|
struct unix_peercred peercred = {};
|
||||||
|
|
||||||
err = -EOPNOTSUPP;
|
err = -EOPNOTSUPP;
|
||||||
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
|
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
|
||||||
|
@ -777,6 +818,9 @@ static int unix_listen(struct socket *sock, int backlog)
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
if (!READ_ONCE(u->addr))
|
if (!READ_ONCE(u->addr))
|
||||||
goto out; /* No listens on an unbound socket */
|
goto out; /* No listens on an unbound socket */
|
||||||
|
err = prepare_peercred(&peercred);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
unix_state_lock(sk);
|
unix_state_lock(sk);
|
||||||
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
|
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
@ -786,11 +830,12 @@ static int unix_listen(struct socket *sock, int backlog)
|
||||||
WRITE_ONCE(sk->sk_state, TCP_LISTEN);
|
WRITE_ONCE(sk->sk_state, TCP_LISTEN);
|
||||||
|
|
||||||
/* set credentials so connect can copy them */
|
/* set credentials so connect can copy them */
|
||||||
update_peercred(sk);
|
update_peercred(sk, &peercred);
|
||||||
err = 0;
|
err = 0;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
unix_state_unlock(sk);
|
unix_state_unlock(sk);
|
||||||
|
drop_peercred(&peercred);
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1525,6 +1570,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||||
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
|
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
|
||||||
struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
|
struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
|
||||||
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
|
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
|
||||||
|
struct unix_peercred peercred = {};
|
||||||
struct net *net = sock_net(sk);
|
struct net *net = sock_net(sk);
|
||||||
struct sk_buff *skb = NULL;
|
struct sk_buff *skb = NULL;
|
||||||
unsigned char state;
|
unsigned char state;
|
||||||
|
@ -1561,6 +1607,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = prepare_peercred(&peercred);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
/* Allocate skb for sending to listening sock */
|
/* Allocate skb for sending to listening sock */
|
||||||
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
|
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
|
||||||
if (!skb) {
|
if (!skb) {
|
||||||
|
@ -1636,7 +1686,7 @@ restart:
|
||||||
unix_peer(newsk) = sk;
|
unix_peer(newsk) = sk;
|
||||||
newsk->sk_state = TCP_ESTABLISHED;
|
newsk->sk_state = TCP_ESTABLISHED;
|
||||||
newsk->sk_type = sk->sk_type;
|
newsk->sk_type = sk->sk_type;
|
||||||
init_peercred(newsk);
|
init_peercred(newsk, &peercred);
|
||||||
newu = unix_sk(newsk);
|
newu = unix_sk(newsk);
|
||||||
newu->listener = other;
|
newu->listener = other;
|
||||||
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
|
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
|
||||||
|
@ -1695,20 +1745,33 @@ out_free_skb:
|
||||||
out_free_sk:
|
out_free_sk:
|
||||||
unix_release_sock(newsk, 0);
|
unix_release_sock(newsk, 0);
|
||||||
out:
|
out:
|
||||||
|
drop_peercred(&peercred);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int unix_socketpair(struct socket *socka, struct socket *sockb)
|
static int unix_socketpair(struct socket *socka, struct socket *sockb)
|
||||||
{
|
{
|
||||||
|
struct unix_peercred ska_peercred = {}, skb_peercred = {};
|
||||||
struct sock *ska = socka->sk, *skb = sockb->sk;
|
struct sock *ska = socka->sk, *skb = sockb->sk;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = prepare_peercred(&ska_peercred);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
err = prepare_peercred(&skb_peercred);
|
||||||
|
if (err) {
|
||||||
|
drop_peercred(&ska_peercred);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/* Join our sockets back to back */
|
/* Join our sockets back to back */
|
||||||
sock_hold(ska);
|
sock_hold(ska);
|
||||||
sock_hold(skb);
|
sock_hold(skb);
|
||||||
unix_peer(ska) = skb;
|
unix_peer(ska) = skb;
|
||||||
unix_peer(skb) = ska;
|
unix_peer(skb) = ska;
|
||||||
init_peercred(ska);
|
init_peercred(ska, &ska_peercred);
|
||||||
init_peercred(skb);
|
init_peercred(skb, &skb_peercred);
|
||||||
|
|
||||||
ska->sk_state = TCP_ESTABLISHED;
|
ska->sk_state = TCP_ESTABLISHED;
|
||||||
skb->sk_state = TCP_ESTABLISHED;
|
skb->sk_state = TCP_ESTABLISHED;
|
||||||
|
|
Loading…
Add table
Reference in a new issue