Merge patch series "net, pidfs: enable handing out pidfds for reaped sk->sk_peer_pid"

Christian Brauner <brauner@kernel.org> says:

SO_PEERPIDFD currently doesn't support handing out pidfds if the
sk->sk_peer_pid thread-group leader has already been reaped. In this
case it currently returns EINVAL. Userspace still wants to get a pidfd
for a reaped process to have a stable handle it can pass on.
This is especially useful now that it is possible to retrieve exit
information through a pidfd via the PIDFD_GET_INFO ioctl()'s
PIDFD_INFO_EXIT flag.

Another summary has been provided by David in [1]:

> A pidfd can outlive the task it refers to, and thus user-space must
> already be prepared that the task underlying a pidfd is gone at the time
> they get their hands on the pidfd. For instance, resolving the pidfd to
> a PID via the fdinfo must be prepared to read `-1`.
>
> Despite user-space knowing that a pidfd might be stale, several kernel
> APIs currently add another layer that checks for this. In particular,
> SO_PEERPIDFD returns `EINVAL` if the peer-task was already reaped,
> but returns a stale pidfd if the task is reaped immediately after the
> respective alive-check.
>
> This has the unfortunate effect that user-space now has two ways to
> check for the exact same scenario: A syscall might return
> EINVAL/ESRCH/... *or* the pidfd might be stale, even though there is no
> particular reason to distinguish both cases. This also propagates
> through user-space APIs, which pass on pidfds. They must be prepared to
> pass on `-1` *or* the pidfd, because there is no guaranteed way to get a
> stale pidfd from the kernel.
> Userspace must already deal with a pidfd referring to a reaped task as
> the task may exit and get reaped at any time will there are still many
> pidfds referring to it.

In order to allow handing out reaped pidfd SO_PEERPIDFD needs to ensure
that PIDFD_INFO_EXIT information is available whenever a pidfd for a
reaped task is created by PIDFD_INFO_EXIT. The uapi promises that reaped
pidfds are only handed out if it is guaranteed that the caller sees the
exit information:

TEST_F(pidfd_info, success_reaped)
{
        struct pidfd_info info = {
                .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
        };

        /*
         * Process has already been reaped and PIDFD_INFO_EXIT been set.
         * Verify that we can retrieve the exit status of the process.
         */
        ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
        ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
        ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
        ASSERT_TRUE(WIFEXITED(info.exit_code));
        ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
}

To hand out pidfds for reaped processes we thus allocate a pidfs entry
for the relevant sk->sk_peer_pid at the time the sk->sk_peer_pid is
stashed and drop it when the socket is destroyed. This guarantees that
exit information will always be recorded for the sk->sk_peer_pid task
and we can hand out pidfds for reaped processes.

* patches from https://lore.kernel.org/20250425-work-pidfs-net-v2-0-450a19461e75@kernel.org:
  net, pidfs: enable handing out pidfds for reaped sk->sk_peer_pid
  pidfs: get rid of __pidfd_prepare()
  net, pidfs: prepare for handing out pidfds for reaped sk->sk_peer_pid
  pidfs: register pid in pidfs

Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-0-450a19461e75@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2025-04-25 10:35:27 +02:00
commit 923ea4d448
No known key found for this signature in database
GPG key ID: 91C61BC06578DCA2
7 changed files with 192 additions and 86 deletions

View file

@ -768,7 +768,7 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
{ {
enum pid_type type; enum pid_type type;
if (flags & PIDFD_CLONE) if (flags & PIDFD_STALE)
return true; return true;
/* /*
@ -777,10 +777,14 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
* pidfd has been allocated perform another check that the pid * pidfd has been allocated perform another check that the pid
* is still alive. If it is exit information is available even * is still alive. If it is exit information is available even
* if the task gets reaped before the pidfd is returned to * if the task gets reaped before the pidfd is returned to
* userspace. The only exception is PIDFD_CLONE where no task * userspace. The only exception are indicated by PIDFD_STALE:
* linkage has been established for @pid yet and the kernel is *
* in the middle of process creation so there's nothing for * (1) The kernel is in the middle of task creation and thus no
* pidfs to miss. * task linkage has been established yet.
* (2) The caller knows @pid has been registered in pidfs at a
* time when the task was still alive.
*
* In both cases exit information will have been reported.
*/ */
if (flags & PIDFD_THREAD) if (flags & PIDFD_THREAD)
type = PIDTYPE_PID; type = PIDTYPE_PID;
@ -874,11 +878,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
int ret; int ret;
/* /*
* Ensure that PIDFD_CLONE can be passed as a flag without * Ensure that PIDFD_STALE can be passed as a flag without
* overloading other uapi pidfd flags. * overloading other uapi pidfd flags.
*/ */
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD); BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK); BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
if (ret < 0) if (ret < 0)
@ -887,7 +891,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
if (!pidfs_pid_valid(pid, &path, flags)) if (!pidfs_pid_valid(pid, &path, flags))
return ERR_PTR(-ESRCH); return ERR_PTR(-ESRCH);
flags &= ~PIDFD_CLONE; flags &= ~PIDFD_STALE;
pidfd_file = dentry_open(&path, flags, current_cred()); pidfd_file = dentry_open(&path, flags, current_cred());
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */ /* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
if (!IS_ERR(pidfd_file)) if (!IS_ERR(pidfd_file))
@ -896,6 +900,65 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
return pidfd_file; return pidfd_file;
} }
/**
* pidfs_register_pid - register a struct pid in pidfs
* @pid: pid to pin
*
* Register a struct pid in pidfs. Needs to be paired with
* pidfs_put_pid() to not risk leaking the pidfs dentry and inode.
*
* Return: On success zero, on error a negative error code is returned.
*/
int pidfs_register_pid(struct pid *pid)
{
struct path path __free(path_put) = {};
int ret;
might_sleep();
if (!pid)
return 0;
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
if (unlikely(ret))
return ret;
/* Keep the dentry and only put the reference to the mount. */
path.dentry = NULL;
return 0;
}
/**
* pidfs_get_pid - pin a struct pid through pidfs
* @pid: pid to pin
*
* Similar to pidfs_register_pid() but only valid if the caller knows
* there's a reference to the @pid through a dentry already that can't
* go away.
*/
void pidfs_get_pid(struct pid *pid)
{
if (!pid)
return;
WARN_ON_ONCE(!stashed_dentry_get(&pid->stashed));
}
/**
* pidfs_put_pid - drop a pidfs reference
* @pid: pid to drop
*
* Drop a reference to @pid via pidfs. This is only safe if the
* reference has been taken via pidfs_get_pid().
*/
void pidfs_put_pid(struct pid *pid)
{
might_sleep();
if (!pid)
return;
VFS_WARN_ON_ONCE(!pid->stashed);
dput(pid->stashed);
}
static void pidfs_inode_init_once(void *data) static void pidfs_inode_init_once(void *data)
{ {
struct pidfs_inode *pi = data; struct pidfs_inode *pi = data;

View file

@ -77,7 +77,7 @@ struct file;
struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file);
void do_notify_pidfd(struct task_struct *task); void do_notify_pidfd(struct task_struct *task);
static inline struct pid *get_pid(struct pid *pid) static inline struct pid *get_pid(struct pid *pid)

View file

@ -8,5 +8,8 @@ void pidfs_add_pid(struct pid *pid);
void pidfs_remove_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid);
void pidfs_exit(struct task_struct *tsk); void pidfs_exit(struct task_struct *tsk);
extern const struct dentry_operations pidfs_dentry_operations; extern const struct dentry_operations pidfs_dentry_operations;
int pidfs_register_pid(struct pid *pid);
void pidfs_get_pid(struct pid *pid);
void pidfs_put_pid(struct pid *pid);
#endif /* _LINUX_PID_FS_H */ #endif /* _LINUX_PID_FS_H */

View file

@ -12,7 +12,7 @@
#define PIDFD_THREAD O_EXCL #define PIDFD_THREAD O_EXCL
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/sched.h> #include <linux/sched.h>
#define PIDFD_CLONE CLONE_PIDFD #define PIDFD_STALE CLONE_PIDFD
#endif #endif
/* Flags for pidfd_send_signal(). */ /* Flags for pidfd_send_signal(). */

View file

@ -2035,55 +2035,11 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
} }
/**
* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
* @ret: Where to return the file for the pidfd.
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
*
* The helper doesn't perform checks on @pid which makes it useful for pidfds
* created via CLONE_PIDFD where @pid has no task attached when the pidfd and
* pidfd file are prepared.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
* order to install the pidfd into its file descriptor table or they must use
* put_unused_fd() and fput() on the returned pidfd and pidfd file
* respectively.
*
* This function is useful when a pidfd must already be reserved but there
* might still be points of failure afterwards and the caller wants to ensure
* that no pidfd is leaked into its file descriptor table.
*
* Return: On success, a reserved pidfd is returned from the function and a new
* pidfd file is returned in the last argument to the function. On
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
struct file *pidfd_file;
CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
if (pidfd < 0)
return pidfd;
pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
if (IS_ERR(pidfd_file))
return PTR_ERR(pidfd_file);
*ret = pidfd_file;
return take_fd(pidfd);
}
/** /**
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd * @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd * @flags: flags of the new @pidfd
* @ret: Where to return the pidfd. * @ret_file: return the new pidfs file
* *
* Allocate a new file that stashes @pid and reserve a new pidfd number in the * Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet. * caller's file descriptor table. The pidfd is reserved but not installed yet.
@ -2106,16 +2062,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
* error, a negative error code is returned from the function and the * error, a negative error code is returned from the function and the
* last argument remains unchanged. * last argument remains unchanged.
*/ */
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file)
{ {
struct file *pidfs_file;
/* /*
* While holding the pidfd waitqueue lock removing the task * PIDFD_STALE is only allowed to be passed if the caller knows
* linkage for the thread-group leader pid (PIDTYPE_TGID) isn't * that @pid is already registered in pidfs and thus
* possible. Thus, if there's still task linkage for PIDTYPE_PID * PIDFD_INFO_EXIT information is guaranteed to be available.
* not having thread-group leader linkage for the pid means it
* wasn't a thread-group leader in the first place.
*/ */
scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) { if (!(flags & PIDFD_STALE)) {
/*
* While holding the pidfd waitqueue lock removing the
* task linkage for the thread-group leader pid
* (PIDTYPE_TGID) isn't possible. Thus, if there's still
* task linkage for PIDTYPE_PID not having thread-group
* leader linkage for the pid means it wasn't a
* thread-group leader in the first place.
*/
guard(spinlock_irq)(&pid->wait_pidfd.lock);
/* Task has already been reaped. */ /* Task has already been reaped. */
if (!pid_has_task(pid, PIDTYPE_PID)) if (!pid_has_task(pid, PIDTYPE_PID))
return -ESRCH; return -ESRCH;
@ -2128,7 +2094,16 @@ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
return -ENOENT; return -ENOENT;
} }
return __pidfd_prepare(pid, flags, ret); CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
if (pidfd < 0)
return pidfd;
pidfs_file = pidfs_alloc_file(pid, flags | O_RDWR);
if (IS_ERR(pidfs_file))
return PTR_ERR(pidfs_file);
*ret_file = pidfs_file;
return take_fd(pidfd);
} }
static void __delayed_free_task(struct rcu_head *rhp) static void __delayed_free_task(struct rcu_head *rhp)
@ -2477,7 +2452,7 @@ __latent_entropy struct task_struct *copy_process(
* Note that no task has been attached to @pid yet indicate * Note that no task has been attached to @pid yet indicate
* that via CLONE_PIDFD. * that via CLONE_PIDFD.
*/ */
retval = __pidfd_prepare(pid, flags | PIDFD_CLONE, &pidfile); retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
if (retval < 0) if (retval < 0)
goto bad_fork_free_pid; goto bad_fork_free_pid;
pidfd = retval; pidfd = retval;

View file

@ -148,6 +148,8 @@
#include <linux/ethtool.h> #include <linux/ethtool.h>
#include <uapi/linux/pidfd.h>
#include "dev.h" #include "dev.h"
static DEFINE_MUTEX(proto_list_mutex); static DEFINE_MUTEX(proto_list_mutex);
@ -1879,6 +1881,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
{ {
struct pid *peer_pid; struct pid *peer_pid;
struct file *pidfd_file = NULL; struct file *pidfd_file = NULL;
unsigned int flags = 0;
int pidfd; int pidfd;
if (len > sizeof(pidfd)) if (len > sizeof(pidfd))
@ -1891,18 +1894,17 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
if (!peer_pid) if (!peer_pid)
return -ENODATA; return -ENODATA;
pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file); /* The use of PIDFD_STALE requires stashing of struct pid
* on pidfs with pidfs_register_pid() and only AF_UNIX
* were prepared for this.
*/
if (sk->sk_family == AF_UNIX)
flags = PIDFD_STALE;
pidfd = pidfd_prepare(peer_pid, flags, &pidfd_file);
put_pid(peer_pid); put_pid(peer_pid);
if (pidfd < 0) { if (pidfd < 0)
/*
* dbus-broker relies on -EINVAL being returned
* to indicate ESRCH. Paper over it until this
* is fixed in userspace.
*/
if (pidfd == -ESRCH)
pidfd = -EINVAL;
return pidfd; return pidfd;
}
if (copy_to_sockptr(optval, &pidfd, len) || if (copy_to_sockptr(optval, &pidfd, len) ||
copy_to_sockptr(optlen, &len, sizeof(int))) { copy_to_sockptr(optlen, &len, sizeof(int))) {

View file

@ -100,6 +100,7 @@
#include <linux/splice.h> #include <linux/splice.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/pidfs.h>
#include <net/af_unix.h> #include <net/af_unix.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/scm.h> #include <net/scm.h>
@ -643,6 +644,9 @@ static void unix_sock_destructor(struct sock *sk)
return; return;
} }
if (sk->sk_peer_pid)
pidfs_put_pid(sk->sk_peer_pid);
if (u->addr) if (u->addr)
unix_release_addr(u->addr); unix_release_addr(u->addr);
@ -734,13 +738,48 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_gc(); /* Garbage collect fds */ unix_gc(); /* Garbage collect fds */
} }
static void init_peercred(struct sock *sk) struct unix_peercred {
struct pid *peer_pid;
const struct cred *peer_cred;
};
static inline int prepare_peercred(struct unix_peercred *peercred)
{ {
sk->sk_peer_pid = get_pid(task_tgid(current)); struct pid *pid;
sk->sk_peer_cred = get_current_cred(); int err;
pid = task_tgid(current);
err = pidfs_register_pid(pid);
if (likely(!err)) {
peercred->peer_pid = get_pid(pid);
peercred->peer_cred = get_current_cred();
}
return err;
} }
static void update_peercred(struct sock *sk) static void drop_peercred(struct unix_peercred *peercred)
{
const struct cred *cred = NULL;
struct pid *pid = NULL;
might_sleep();
swap(peercred->peer_pid, pid);
swap(peercred->peer_cred, cred);
pidfs_put_pid(pid);
put_pid(pid);
put_cred(cred);
}
static inline void init_peercred(struct sock *sk,
const struct unix_peercred *peercred)
{
sk->sk_peer_pid = peercred->peer_pid;
sk->sk_peer_cred = peercred->peer_cred;
}
static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
{ {
const struct cred *old_cred; const struct cred *old_cred;
struct pid *old_pid; struct pid *old_pid;
@ -748,11 +787,11 @@ static void update_peercred(struct sock *sk)
spin_lock(&sk->sk_peer_lock); spin_lock(&sk->sk_peer_lock);
old_pid = sk->sk_peer_pid; old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred; old_cred = sk->sk_peer_cred;
init_peercred(sk); init_peercred(sk, peercred);
spin_unlock(&sk->sk_peer_lock); spin_unlock(&sk->sk_peer_lock);
put_pid(old_pid); peercred->peer_pid = old_pid;
put_cred(old_cred); peercred->peer_cred = old_cred;
} }
static void copy_peercred(struct sock *sk, struct sock *peersk) static void copy_peercred(struct sock *sk, struct sock *peersk)
@ -761,6 +800,7 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
spin_lock(&sk->sk_peer_lock); spin_lock(&sk->sk_peer_lock);
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
pidfs_get_pid(sk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
spin_unlock(&sk->sk_peer_lock); spin_unlock(&sk->sk_peer_lock);
} }
@ -770,6 +810,7 @@ static int unix_listen(struct socket *sock, int backlog)
int err; int err;
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk); struct unix_sock *u = unix_sk(sk);
struct unix_peercred peercred = {};
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@ -777,6 +818,9 @@ static int unix_listen(struct socket *sock, int backlog)
err = -EINVAL; err = -EINVAL;
if (!READ_ONCE(u->addr)) if (!READ_ONCE(u->addr))
goto out; /* No listens on an unbound socket */ goto out; /* No listens on an unbound socket */
err = prepare_peercred(&peercred);
if (err)
goto out;
unix_state_lock(sk); unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
goto out_unlock; goto out_unlock;
@ -786,11 +830,12 @@ static int unix_listen(struct socket *sock, int backlog)
WRITE_ONCE(sk->sk_state, TCP_LISTEN); WRITE_ONCE(sk->sk_state, TCP_LISTEN);
/* set credentials so connect can copy them */ /* set credentials so connect can copy them */
update_peercred(sk); update_peercred(sk, &peercred);
err = 0; err = 0;
out_unlock: out_unlock:
unix_state_unlock(sk); unix_state_unlock(sk);
drop_peercred(&peercred);
out: out:
return err; return err;
} }
@ -1525,6 +1570,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct unix_peercred peercred = {};
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
unsigned char state; unsigned char state;
@ -1561,6 +1607,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out; goto out;
} }
err = prepare_peercred(&peercred);
if (err)
goto out;
/* Allocate skb for sending to listening sock */ /* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
if (!skb) { if (!skb) {
@ -1636,7 +1686,7 @@ restart:
unix_peer(newsk) = sk; unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED; newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type; newsk->sk_type = sk->sk_type;
init_peercred(newsk); init_peercred(newsk, &peercred);
newu = unix_sk(newsk); newu = unix_sk(newsk);
newu->listener = other; newu->listener = other;
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
@ -1695,20 +1745,33 @@ out_free_skb:
out_free_sk: out_free_sk:
unix_release_sock(newsk, 0); unix_release_sock(newsk, 0);
out: out:
drop_peercred(&peercred);
return err; return err;
} }
static int unix_socketpair(struct socket *socka, struct socket *sockb) static int unix_socketpair(struct socket *socka, struct socket *sockb)
{ {
struct unix_peercred ska_peercred = {}, skb_peercred = {};
struct sock *ska = socka->sk, *skb = sockb->sk; struct sock *ska = socka->sk, *skb = sockb->sk;
int err;
err = prepare_peercred(&ska_peercred);
if (err)
return err;
err = prepare_peercred(&skb_peercred);
if (err) {
drop_peercred(&ska_peercred);
return err;
}
/* Join our sockets back to back */ /* Join our sockets back to back */
sock_hold(ska); sock_hold(ska);
sock_hold(skb); sock_hold(skb);
unix_peer(ska) = skb; unix_peer(ska) = skb;
unix_peer(skb) = ska; unix_peer(skb) = ska;
init_peercred(ska); init_peercred(ska, &ska_peercred);
init_peercred(skb); init_peercred(skb, &skb_peercred);
ska->sk_state = TCP_ESTABLISHED; ska->sk_state = TCP_ESTABLISHED;
skb->sk_state = TCP_ESTABLISHED; skb->sk_state = TCP_ESTABLISHED;